This commit is contained in:
2024-01-25 21:51:23 +00:00
parent 57d216cdab
commit 7b6a16b662
13 changed files with 493 additions and 0 deletions

244
element_words_rs/src/lib.rs Normal file
View File

@@ -0,0 +1,244 @@
use itertools::Itertools;
#[derive(PartialEq, Debug)]
pub enum ElementSubstring {
Element(String),
NonElement(String),
}
fn elementsubstringvec_length(elword: &Vec<ElementSubstring>) -> usize {
let mut size = 0;
for substring in elword {
match substring {
ElementSubstring::Element(s) => {
for _char in s.chars() {
size += 1;
}
}
ElementSubstring::NonElement(s) => {
for _char in s.chars() {
size += 1;
}
}
}
}
size
}
const SINGLE_CHAR_ELEMENTS: &'static [&'static str] = &[
"B", "C", "F", "H", "I", "N", "O", "P", "K", "S", "W", "U", "V", "Y",
];
const DOUBLE_CHAR_ELEMENTS: &'static [&'static str] = &[
"Ac", "Al", "Am", "Sb", "Ar", "As", "At", "Ba", "Bk", "Be", "Bi", "Bh", "Br", "Cd", "Cs", "Ca",
"Cf", "Ce", "Cl", "Cr", "Co", "Cn", "Cu", "Cm", "Ds", "Db", "Dy", "Es", "Er", "Eu", "Fm", "Fl",
"Fr", "Gd", "Ga", "Ge", "Au", "Hf", "Hs", "He", "Ho", "In", "Ir", "Fe", "Kr", "La", "Lr", "Pb",
"Li", "Lv", "Lu", "Mg", "Mn", "Mt", "Md", "Hg", "Mo", "Mc", "Nd", "Ne", "Np", "Ni", "Nh", "Nb",
"No", "Og", "Os", "Pd", "Pt", "Pu", "Po", "Pr", "Pm", "Pa", "Ra", "Rn", "Re", "Rh", "Rg", "Rb",
"Ru", "Rf", "Sm", "Sc", "Sg", "Se", "Si", "Ag", "Na", "Sr", "Ta", "Tc", "Te", "Ts", "Tb", "Tl",
"Th", "Tm", "Sn", "Ti", "Xe", "Yb", "Zn", "Zr",
];
pub fn partial_elname(word: &str) -> Vec<ElementSubstring> {
let mut elword = vec![];
let word = word.to_lowercase();
let mut substring: String = String::new();
let mut skip_next_loop = false;
for (prev, next) in word.chars().tuple_windows() {
if skip_next_loop {
skip_next_loop = false;
continue;
}
let potential_el: String = prev.to_string().to_uppercase() + &next.to_string();
let mut found_match = false;
for element in DOUBLE_CHAR_ELEMENTS {
if element == &potential_el {
if substring.len() > 0 {
elword.push(ElementSubstring::NonElement(substring.clone()));
substring = String::new();
}
elword.push(ElementSubstring::Element(element.to_string()));
skip_next_loop = true;
found_match = true;
break;
}
}
if found_match {
continue;
}
for element in SINGLE_CHAR_ELEMENTS {
if element.to_lowercase().chars().next().unwrap() == prev {
if substring.len() > 0 {
elword.push(ElementSubstring::NonElement(substring.clone()));
substring = String::new();
}
elword.push(ElementSubstring::Element(element.to_string()));
found_match = true;
break;
}
}
if found_match {
continue;
}
substring.push(prev);
}
// This gets triggered when either:
// - the last window is 2 valid Elements as in "CrISP", or
// - the second to last window is a 2 letter Element (and would be skipped) as in "BaN"
if elementsubstringvec_length(&elword) <= word.chars().count() - 1 {
let last_char = word.chars().last().unwrap();
let mut found_match = false;
for element in SINGLE_CHAR_ELEMENTS {
if element.to_lowercase().chars().next().unwrap() == last_char {
if substring.len() > 0 {
elword.push(ElementSubstring::NonElement(substring.clone()));
substring = String::new();
}
elword.push(ElementSubstring::Element(element.to_string()));
found_match = true;
break;
}
}
if !found_match {
substring.push(last_char);
}
}
if substring.len() > 0 {
elword.push(ElementSubstring::NonElement(substring.clone()));
}
elword
}
pub fn complete_elname(word: &str) -> Option<Vec<String>> {
let mut elword: Vec<String> = vec![];
let word = word.to_lowercase();
// The thing feels a bit un-rust-y, but does seem to work!
let mut skip_next_loop = false;
for (prev, next) in word.chars().tuple_windows() {
if skip_next_loop {
skip_next_loop = false;
continue;
}
let potential_el: String = prev.to_string().to_uppercase() + &next.to_string();
let mut found_match = false;
// Can probably short circuit slightly here based on ASCII value or something (if in alphabetical order)
for element in DOUBLE_CHAR_ELEMENTS {
if element == &potential_el {
elword.push(element.to_string());
skip_next_loop = true;
found_match = true;
break;
}
}
if found_match {
continue;
}
for element in SINGLE_CHAR_ELEMENTS {
if element.to_lowercase().chars().next().unwrap() == prev {
elword.push(element.to_string());
found_match = true;
break;
}
}
if !found_match {
return None;
}
}
// This gets triggered when either:
// - the last window is 2 valid Elements as in "CrISP", or
// - the second to last window is a 2 letter Element (and would be skipped) as in "BaN"
let mut num_chars = 0;
for els in &elword {
num_chars += els.chars().count();
}
if num_chars == word.chars().count() - 1 {
let mut found_match = false;
let last_char = word.chars().last().unwrap();
for element in SINGLE_CHAR_ELEMENTS {
if element.to_lowercase().chars().nth(0).unwrap() == last_char {
elword.push(element.to_string());
found_match = true;
break;
}
}
if !found_match {
return None;
}
}
Some(elword)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_complete_elname() {
assert_eq!(complete_elname("a"), None);
assert_eq!(complete_elname("bar"), None);
assert_eq!(complete_elname("zee"), None);
assert_eq!(complete_elname("ge"), Some(vec!["Ge".to_string()]));
assert_eq!(
complete_elname("cras"),
Some(vec!["Cr".to_string(), "As".to_string()])
);
assert_eq!(
complete_elname("ban"),
Some(vec!["Ba".to_string(), "N".to_string()])
);
assert_eq!(complete_elname("arthur"), None);
assert_eq!(
complete_elname("crisp"),
Some(vec![
"Cr".to_string(),
"I".to_string(),
"S".to_string(),
"P".to_string()
])
);
assert_eq!(complete_elname("accepting"), None);
}
#[test]
fn test_partial_elname() {
type ES = ElementSubstring;
assert_eq!(partial_elname("a"), vec![ES::NonElement("a".to_string())]);
assert_eq!(
partial_elname("bar"),
vec![
ES::Element("Ba".to_string()),
ES::NonElement("r".to_string())
]
);
assert_eq!(
partial_elname("zee"),
vec![ES::NonElement("zee".to_string())]
);
assert_eq!(partial_elname("ge"), vec![ES::Element("Ge".to_string())]);
assert_eq!(
partial_elname("crisp"),
vec![
ES::Element("Cr".to_string()),
ES::Element("I".to_string()),
ES::Element("S".to_string()),
ES::Element("P".to_string())
]
);
assert_eq!(
partial_elname("accepting"),
vec![
ES::Element("Ac".to_string()),
ES::Element("Ce".to_string()),
ES::Element("Pt".to_string()),
ES::Element("In".to_string()),
ES::NonElement("g".to_string())
]
);
}
}