Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/aff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ pub(crate) struct AffOptions {
pub forbid_warn: bool,
pub only_in_compound_flag: Option<Flag>,
pub circumfix_flag: Option<Flag>,
pub forbidden_word_flag: Option<Flag>,
pub forbidden_word_flag: Flag,
pub keep_case_flag: Option<Flag>,
pub need_affix_flag: Option<Flag>,
pub warn_flag: Option<Flag>,
Expand Down Expand Up @@ -1267,13 +1267,18 @@ pub(crate) struct AffOptions {
impl Default for AffOptions {
fn default() -> Self {
Self {
// Hunspell:
// // default flags
// #define DEFAULTFLAGS 65510
// #define FORBIDDENWORD 65510
// #define ONLYUPCASEFLAG 65511
complex_prefixes: Default::default(),
fullstrip: Default::default(),
checksharps: Default::default(),
forbid_warn: Default::default(),
only_in_compound_flag: Default::default(),
circumfix_flag: Default::default(),
forbidden_word_flag: Default::default(),
forbidden_word_flag: Flag::new(65510).unwrap(),
keep_case_flag: Default::default(),
need_affix_flag: Default::default(),
warn_flag: Default::default(),
Expand Down
9 changes: 3 additions & 6 deletions src/aff/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,7 @@ pub(crate) fn parse<'aff, 'dic, S: BuildHasher + Clone>(
// converting them to titlecase and setting the hidden homonym flag.
let casing = crate::classify_casing(word.as_ref());
if (matches!(casing, Casing::Pascal | Casing::Camel)
&& !cx
.options
.forbidden_word_flag
.is_some_and(|flag| flagset.contains(&flag)))
&& !flagset.contains(&cx.options.forbidden_word_flag))
|| (matches!(casing, Casing::All) && !flagset.is_empty())
{
let word = cx.options.case_handling.titlecase(word.as_ref()).into();
Expand Down Expand Up @@ -253,7 +250,7 @@ fn parse_flag_type(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult {
fn parse_forbidden_word_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult {
lines
.parse_flag(cx)
.map(|flag| cx.options.forbidden_word_flag = Some(flag))
.map(|flag| cx.options.forbidden_word_flag = flag)
}

fn parse_circumfix_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult {
Expand Down Expand Up @@ -2102,7 +2099,7 @@ mod test {
FORCEUCASE p
"#;
let (_words, aff_data) = parse(aff, dic, DefaultHashBuilder::default()).unwrap();
assert_eq!(aff_data.options.forbidden_word_flag, Some(flag!('a')));
assert_eq!(aff_data.options.forbidden_word_flag, flag!('a'));
assert_eq!(aff_data.options.circumfix_flag, Some(flag!('b')));
assert_eq!(aff_data.options.keep_case_flag, Some(flag!('c')));
assert_eq!(aff_data.options.need_affix_flag, Some(flag!('d')));
Expand Down
17 changes: 13 additions & 4 deletions src/checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ impl<'a, S: BuildHasher> Checker<'a, S> {
const MAX_DEPTH: usize = 9;

if let Some(flags) = &self.spell_casing(word) {
if has_flag!(flags, self.aff.options.forbidden_word_flag) {
if flags.contains(&self.aff.options.forbidden_word_flag) {
return false;
}

Expand Down Expand Up @@ -1444,7 +1444,10 @@ impl<'a, S: BuildHasher> Checker<'a, S> {
// TODO: investigate just using a big chain of `||`s.

let part1_entry = self.check_word_in_compound::<MODE>(&word[start_pos..i])?;
if has_flag!(part1_entry.flags, self.aff.options.forbidden_word_flag) {
if part1_entry
.flags
.contains(&self.aff.options.forbidden_word_flag)
{
return None;
}
if self.aff.options.compound_check_triple && are_three_chars_equal(word, i) {
Expand All @@ -1469,7 +1472,10 @@ impl<'a, S: BuildHasher> Checker<'a, S> {
part1_entry,
);
};
if has_flag!(part2_entry.flags, self.aff.options.forbidden_word_flag) {
if part2_entry
.flags
.contains(&self.aff.options.forbidden_word_flag)
{
return self.check_compound_classic_try_recursive(
word,
start_pos,
Expand Down Expand Up @@ -1675,7 +1681,10 @@ impl<'a, S: BuildHasher> Checker<'a, S> {
part1_entry,
);
};
if has_flag!(part2_entry.flags, self.aff.options.forbidden_word_flag) {
if part2_entry
.flags
.contains(&self.aff.options.forbidden_word_flag)
{
return self.check_compound_classic_try_simplified_triple_recursive(
word_with_triple,
word,
Expand Down
74 changes: 69 additions & 5 deletions src/hash_bag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use core::{
hash::{BuildHasher, Hash},
};

use hashbrown::hash_table::{self, HashTable, IterHash};
use hashbrown::hash_table::{self, HashTable};

/// A collection of key-value pairs - similar to a HashMap - which allows for duplicate keys.
///
Expand Down Expand Up @@ -84,6 +84,22 @@ where
key: k,
}
}

/// Gets all values for any pairs in the table with the given key.
///
/// This is the same as `get` but only exclusive borrows of the values are returned.
pub fn get_mut<'bag, 'key, Q>(&'bag mut self, k: &'key Q) -> GetAllMutIter<'bag, 'key, Q, K, V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
let hash = make_hash(&self.build_hasher, k);

GetAllMutIter {
inner: self.table.iter_hash_mut(hash),
key: k,
}
}
}

impl<K, V, S> Debug for HashBag<K, V, S>
Expand Down Expand Up @@ -148,7 +164,7 @@ where
K: Borrow<Q>,
Q: Hash + Eq,
{
inner: IterHash<'bag, (K, V)>,
inner: hash_table::IterHash<'bag, (K, V)>,
key: &'key Q,
}

Expand All @@ -174,6 +190,37 @@ where
}
}

pub struct GetAllMutIter<'bag, 'key, Q: ?Sized, K, V>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
inner: hash_table::IterHashMut<'bag, (K, V)>,
key: &'key Q,
}

impl<'bag, Q: ?Sized, K, V> Iterator for GetAllMutIter<'bag, '_, Q, K, V>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
type Item = &'bag mut V;

fn next(&mut self) -> Option<Self::Item> {
loop {
match self.inner.next() {
Some((k, v)) => {
if self.key.eq((*k).borrow()) {
return Some(v);
}
continue;
}
None => return None,
}
}
}
}

#[cfg(test)]
mod test {
use core::hash::BuildHasher;
Expand Down Expand Up @@ -246,17 +293,17 @@ mod test {
#[test]
fn iter() {
// The iterator is currently unused but very small and could be useful for debugging.
let pairs = &[(1, 1), (1, 2), (1, 3), (3, 1)];
let pairs = [(1, 1), (1, 2), (1, 3), (3, 1)];
let mut bag = HashBag::new();
for (k, v) in pairs {
bag.insert(k, v);
}

assert_eq!(bag.iter().len(), pairs.len());

let mut values: Vec<_> = bag.iter().map(|(k, v)| (**k, **v)).collect();
let mut values: Vec<_> = bag.iter().map(|(k, v)| (*k, *v)).collect();
values.sort_unstable();
assert_eq!(&values, pairs);
assert_eq!(values, pairs);
}

#[test]
Expand All @@ -277,4 +324,21 @@ mod test {
crate::alloc::format!("{bag:?}").as_str()
);
}

#[test]
fn mutate() {
let pairs = [(1, 1), (1, 2), (1, 3), (3, 1)];
let mut bag = HashBag::new();
for (k, v) in pairs {
bag.insert(k, v);
}

for mut_v in bag.get_mut(&1) {
*mut_v *= 2;
}

let mut values: Vec<_> = bag.iter().map(|(k, v)| (*k, *v)).collect();
values.sort_unstable();
assert_eq!(values, [(1, 2), (1, 4), (1, 6), (3, 1)]);
}
}
41 changes: 41 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,47 @@ impl<S: BuildHasher> Dictionary<S> {
self.words.insert(word, flagset);
Ok(())
}

/// Removes the given stem from the dictionary.
///
/// Once removed, `check` will return `false` for all conjugations of the stem. For example
/// "adventuring" in the `en_US` dictionary is based on the stem "adventure" with the "ing"
/// suffix applied. Removing "adventuring" does nothing while removing "adventure" removes
/// "adventuring", "adventured", etc..
///
/// This function returns `true` if any stem in the dictionary is removed, otherwise `false`.
///
/// # Example
///
/// ```
/// let aff = std::fs::read_to_string("./vendor/en_US/en_US.aff").unwrap();
/// let dic = std::fs::read_to_string("./vendor/en_US/en_US.dic").unwrap();
/// let mut dict = spellbook::Dictionary::new(&aff, &dic).unwrap();
///
/// assert!(dict.check("adventure"));
/// assert!(dict.check("adventuring"));
///
/// // `remove` only works on stems.
/// assert!(!dict.remove_stem("adventuring"));
/// assert!(dict.check("adventure"));
/// assert!(dict.check("adventuring"));
/// // Removing the stem removes all conjugations.
/// assert!(dict.remove_stem("adventure"));
/// assert!(!dict.check("adventure"));
/// assert!(!dict.check("adventuring"));
/// // Once removed, removing the same stem again is a no-op.
/// assert!(!dict.remove_stem("adventure"));
/// ```
pub fn remove_stem(&mut self, word: &str) -> bool {
let mut did_remove = false;
for flags in self.words.get_mut(word) {
if !flags.contains(&self.aff_data.options.forbidden_word_flag) {
did_remove = true;
*flags = flags.with_flag(self.aff_data.options.forbidden_word_flag);
}
}
did_remove
}
}

impl fmt::Debug for Dictionary {
Expand Down
16 changes: 10 additions & 6 deletions src/suggester.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ impl<'a, S: BuildHasher> Suggester<'a, S> {
.checker
.check_word(&buffer, Forceucase::default(), HiddenHomonym::default())
.is_some_and(|flags| {
has_flag!(flags, self.checker.aff.options.forbidden_word_flag)
flags.contains(&self.checker.aff.options.forbidden_word_flag)
})
{
out.push(buffer.clone());
Expand Down Expand Up @@ -278,10 +278,14 @@ impl<'a, S: BuildHasher> Suggester<'a, S> {
// > Suggest with dots can go here but nobody uses it so no point in
// > implementing it.

if matches!(casing, Casing::Init | Casing::All)
&& (self.checker.aff.options.keep_case_flag.is_some()
|| self.checker.aff.options.forbidden_word_flag.is_some())
{
// NOTE: Both Hunspell and Nuspell check that the forbidden word flag is not zero (None)
// like the following code:
// if matches!(casing, Casing::Init | Casing::All)
// && (self.checker.aff.options.keep_case_flag.is_some()
// || self.checker.aff.options.forbidden_word_flag.is_some())
// However the forbidden word flag has a non-zero default, so this `&&` branch always
// evaluates to `true`. So the check is not done at all here.
if matches!(casing, Casing::Init | Casing::All) {
// Happily this is cleaner in Rust because of `retain_mut`. Nuspell inlines
// `remove_if(it, last, is_not_ok)` because it needs to modify the suggestions.
out.retain_mut(|suggestion| {
Expand Down Expand Up @@ -377,7 +381,7 @@ impl<'a, S: BuildHasher> Suggester<'a, S> {
return false;
};

if has_flag!(flags, self.checker.aff.options.forbidden_word_flag) {
if flags.contains(&self.checker.aff.options.forbidden_word_flag) {
return false;
}

Expand Down
2 changes: 1 addition & 1 deletion src/suggester/ngram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl<S: BuildHasher> Suggester<'_, S> {
let mut lowercase_stem_buf = Vec::with_capacity(stem_buf.len());
let mut roots = BinaryHeap::with_capacity(100);
for entry @ (stem, flagset) in self.checker.words.iter() {
if has_flag!(flagset, self.checker.aff.options.forbidden_word_flag)
if flagset.contains(&self.checker.aff.options.forbidden_word_flag)
|| has_flag!(flagset, self.checker.aff.options.no_suggest_flag)
|| has_flag!(flagset, self.checker.aff.options.only_in_compound_flag)
|| flagset.contains(&HIDDEN_HOMONYM_FLAG)
Expand Down