diff options
| author | Botahamec <botahamec@outlook.com> | 2023-08-02 21:55:44 -0400 |
|---|---|---|
| committer | Botahamec <botahamec@outlook.com> | 2023-08-02 21:55:44 -0400 |
| commit | de4eb88e8a60b40a034bf8e21db9aa57b171a951 (patch) | |
| tree | 60433fa820af7f33d44d78fb6126fa4cd0d4d98f | |
| parent | a8ed81a707baad45808f6b800b49e6bd0d374f15 (diff) | |
Documentation
| -rw-r--r-- | src/csets.rs | 95 | ||||
| -rw-r--r-- | src/scanner.rs | 219 |
2 files changed, 312 insertions, 2 deletions
diff --git a/src/csets.rs b/src/csets.rs index bc739b2..9797d1a 100644 --- a/src/csets.rs +++ b/src/csets.rs @@ -1,8 +1,45 @@ use std::collections::HashSet; +/// An unordered set of characters +/// +/// # Example +/// +/// ``` +/// use snob::csets::CharacterSet; +/// +/// struct AsciiCharacter; +/// +/// impl CharacterSet for AsciiCharacter { +/// fn contains(&self, ch: char) -> bool { +/// ch.is_ascii() +/// } +/// } +/// ``` pub trait CharacterSet { + /// Returns `true` if the character set contains the given character. + /// + /// # Example + /// + /// ``` + /// use snob::csets::AsciiLetters; + /// + /// assert!(AsciiLetters.contains('h')); + /// assert!(!AsciiLetters.contains(' ')); + /// ``` fn contains(&self, ch: char) -> bool; + /// Returns a [`CharacterSet`] that contains the characters in the `self` + /// set, as well as any characters in the given `other` character set. + /// + /// # Example + /// + /// ``` + /// use snob::csets::AsciiLetters; + /// + /// let cset = AsciiLetters.union(' '); + /// assert!(cset.contains('h')); + /// assert!(cset.contains(' ')); + /// ``` fn union<Other: CharacterSet>(self, other: Other) -> CharacterSetUnion<Self, Other> where Self: Sized, @@ -13,6 +50,19 @@ pub trait CharacterSet { } } + /// Returns a [`CharacterSet`] that contains only the characters in both + /// of `self` and `other`. + /// + /// # Example + /// + /// ``` + /// use snob::csets::AsciiLetters; + /// + /// let cset = AsciiLetters.intersection("Hello, world"); + /// assert!(cset.contains('e')); + /// assert!(!cset.contains('a')); + /// assert!(!cset.contains(' ')); + /// ``` fn intersection<Other: CharacterSet>( self, other: Other, @@ -26,6 +76,19 @@ pub trait CharacterSet { } } + /// Returns a [`CharacterSet`] that contains the characters in the `self` + /// character set, unless they are also contained in `other`. + /// + /// # Example + /// + /// ``` + /// use snob::csets::AsciiLetters; + /// + /// let cset = AsciiLetters.intersection("Hello, world"); + /// assert!(cset.contains('a')); + /// assert!(!cset.contains('e')); + /// assert!(!cset.contains(' ')); + /// ``` fn difference<Other: CharacterSet>(self, other: Other) -> CharacterSetDifference<Self, Other> where Self: Sized, @@ -36,6 +99,18 @@ pub trait CharacterSet { } } + /// Returns a [`CharacterSet`] that contains all of the characters that are + /// NOT contained in the `self` character set. + /// + /// # Example + /// + /// ``` + /// use snob::csets::AsciiLetters; + /// + /// let cset = AsciiLetters.complement(); + /// assert!(!cset.contains('a')); + /// assert!(cset.contains(' ')); + /// ``` fn complement(self) -> CharacterSetComplement<Self> where Self: Sized, @@ -44,6 +119,7 @@ pub trait CharacterSet { } } +/// Contains all Unicode characters #[derive(Debug, Clone, Copy)] pub struct AnyCharacter; @@ -53,6 +129,7 @@ impl CharacterSet for AnyCharacter { } } +/// Contains all ASCII characters #[derive(Debug, Clone, Copy)] pub struct Ascii; @@ -62,6 +139,7 @@ impl CharacterSet for Ascii { } } +/// Contains the ASCII digits, 0-9 #[derive(Debug, Clone, Copy)] pub struct AsciiDigits; @@ -71,6 +149,7 @@ impl CharacterSet for AsciiDigits { } } +/// Contains all lowercase ASCII letters, a-z #[derive(Debug, Clone, Copy)] pub struct AsciiLowercase; @@ -80,6 +159,7 @@ impl CharacterSet for AsciiLowercase { } } +/// Contains all uppercase ASCII letters, A-Z #[derive(Debug, Clone, Copy)] pub struct AsciiUppercase; @@ -89,6 +169,7 @@ impl CharacterSet for AsciiUppercase { } } +/// Containes all ASCII letters: a-z, A-Z #[derive(Debug, Clone, Copy)] pub struct AsciiLetters; @@ -122,6 +203,9 @@ impl CharacterSet for HashSet<char> { } } +/// A union of two [`CharacterSet`]s. +/// +/// This is created by calling [`CharacterSet::union`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct CharacterSetUnion<A: CharacterSet, B: CharacterSet> { first: A, @@ -134,6 +218,9 @@ impl<A: CharacterSet, B: CharacterSet> CharacterSet for CharacterSetUnion<A, B> } } +/// An intersection of two [`CharacterSet`]s. +/// +/// This is created by calling [`CharacterSet::intersection`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct CharacterSetIntersection<A: CharacterSet, B: CharacterSet> { first: A, @@ -146,8 +233,10 @@ impl<A: CharacterSet, B: CharacterSet> CharacterSet for CharacterSetIntersection } } +/// The difference of two [`CharacterSet`]s. +/// +/// This is created by calling [`CharacterSet::difference`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct CharacterSetDifference<A: CharacterSet, B: CharacterSet> { first: A, second: B, @@ -159,8 +248,10 @@ impl<A: CharacterSet, B: CharacterSet> CharacterSet for CharacterSetDifference<A } } +/// The complement of a [`CharacterSet`]. +/// +/// This is created by calling [`CharacterSet::complement`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct CharacterSetComplement<Inner: CharacterSet> { inner: Inner, } diff --git a/src/scanner.rs b/src/scanner.rs index f2c4a2d..475098c 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,5 +1,18 @@ use crate::csets::CharacterSet; +/// This is used to analyze string. It can be initialized using either +/// [`Scanner::from`] or [`Scanner::new`]. +/// +/// # Example +/// +/// ``` +/// use snob::Scanner; +/// +/// let mut scanner = Scanner::new("Hello, world!"); +/// if let Some(position) = scanner.starts_with("Hello") { +/// scanner.goto(position); +/// } +/// ``` #[derive(Debug, Clone)] pub struct Scanner { source: Box<[char]>, @@ -7,6 +20,15 @@ pub struct Scanner { } impl Scanner { + /// Create a new Scanner with a given source. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let scanner = Scanner::new("Hello, world!"); + /// ``` pub fn new(source: impl AsRef<str>) -> Self { Self { source: source.as_ref().chars().collect(), @@ -14,30 +36,117 @@ impl Scanner { } } + /// Get the full source being used in this scanner, as a slice of characters + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let scanner = Scanner::new("Hello, world!"); + /// let source = scanner.source().iter().collect::<String>(); + /// assert_eq!(scanner.source().iter().collect::<String>(), "Hello, world!"); + /// ``` pub fn source(&self) -> &[char] { &self.source } + /// Get the full length of the source being used in this scanner + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let scanner = Scanner::new("Hello, world!"); + /// assert_eq!(scanner.len(), 13); + /// ``` pub fn len(&self) -> usize { self.source.len() } + /// Returns `true` if the scanner's source is an empty string + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let scanner = Scanner::new("Hello, world!"); + /// assert!(!scanner.is_empty()); + /// ``` pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Get the character at a given position in the string. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let scanner = Scanner::new("Hello, world!"); + /// assert!(!scanner.is_empty()); + /// ``` pub fn char_at(&self, index: usize) -> Option<char> { self.source.get(index).cloned() } + /// Get the current position in the string. When the [`Scanner`] is + /// created, this value is zero. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let mut scanner = Scanner::new("Hello, world!"); + /// assert_eq!(scanner.position(), 0); + /// scanner.advance(5); + /// assert_eq!(scanner.position(), 5); + /// scanner.goto(3); + /// assert_eq!(scanner.position(), 3); + /// ``` pub fn position(&self) -> usize { self.position } + /// Returns true if the scanner's position has reached the end of its + /// source. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let mut scanner = Scanner::new("Hello, world!"); + /// assert!(!scanner.is_at_end()); + /// + /// if let Some(position) = scanner.starts_with("Hello, world!") { + /// scanner.goto(position); + /// } + /// + /// assert!(scanner.is_at_end()); + /// ``` pub fn is_at_end(&self) -> bool { self.position == self.source.len() } + /// Set the scanner's `position`. If the position out of range out the + /// source, then `None` is returned. Otherwise, the subslice from the old + /// position to the new position is returned. If the latter is less than + /// the former, then the string is reversed. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let mut scanner = Scanner::new("Hello, world!"); + /// scanner.goto(3); + /// assert_eq!(scanner.position(), 3); + /// ``` pub fn goto(&mut self, position: usize) -> Option<String> { // allow reverse ranges let production = if self.position < position { @@ -54,11 +163,40 @@ impl Scanner { Some(production) } + /// Increase the position by the given `amount`. If the new position is out + /// of the range of the source, then `None` is returned. Otherwise, the + /// subslice from the old position to the new position is returned. If the + /// latter is less than the former, then the string is reversed. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// let mut scanner = Scanner::new("Hello, world!"); + /// scanner.advance(5); + /// assert_eq!(scanner.position(), 5); + /// ``` pub fn advance(&mut self, amount: isize) -> Option<String> { let position = self.position.checked_add_signed(amount)?; self.goto(position) } + /// Looks for the given `substring` in the remainder of the scanner. If the + /// substring is found, the position of the first character in the + /// substring is returned. Otherwise, `None` is returned. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// # fn foo() -> Option<()> { + /// let scanner = Scanner::new("Hello, world!"); + /// let position = scanner.find_substring("lo")?; + /// assert_eq!(position, 3); + /// # Some(()) + /// # } pub fn find_substring(&self, substring: impl AsRef<str>) -> Option<usize> { self.source .get(self.position..)? @@ -67,6 +205,21 @@ impl Scanner { .find(substring.as_ref()) } + /// If `source[position..]` starts with the given string, then this returns + /// the ending position of the substring. Otherwise, `None` is returned. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// # fn foo() -> Option<()> { + /// let scanner = Scanner::new("Hello, world!"); + /// let position = scanner.starts_with("Hello")?; + /// assert_eq!(position, 5); + /// # Some(()) + /// # } + /// ``` pub fn starts_with(&self, substring: impl AsRef<str>) -> Option<usize> { let mut i = self.position; for substring_char in substring.as_ref().chars() { @@ -79,16 +232,66 @@ impl Scanner { Some(i) } + /// If `source[position..]` starts with the given string, then this returns + /// a copy of the substring. Otherwise, `None` is returned. This is the + /// equivalent of: `self.goto(self.starts_with(substring)?)`. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// # fn foo() -> Option<()> { + /// let mut scanner = Scanner::new("Hello, world!"); + /// let substring = scanner.advance_if_starts_with("Hello")?; + /// assert_eq!(substring, "Hello"); + /// assert_eq!(scanner.position(), 5); + /// # Some(()) + /// # } + /// ``` pub fn advance_if_starts_with(&mut self, substring: impl AsRef<str>) -> Option<String> { let position = self.starts_with(substring)?; self.goto(position) } + /// If the next character in the scanner is contained in the given `cset`, + /// then the position after the next character is returned. Otherwise, + /// `None` is returned. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// # fn foo() -> Option<()> { + /// let scanner = Scanner::new("Hello, world!"); + /// let position = scanner.any('H')?; + /// assert_eq!(position, 1); + /// # Some(()) + /// # } + /// ``` pub fn any(&self, cset: impl CharacterSet) -> Option<usize> { cset.contains(*self.source.get(self.position)?) .then_some(self.position + 1) } + /// If the next character in the scanner is contained in the given `cset`, + /// then the position after the longest initial sequence of characters in + /// `cset` is returned. Otherwise, `None` is returned. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// use snob::csets::AsciiLetters; + /// + /// # fn foo() -> Option<()> { + /// let scanner = Scanner::new("Hello, world!"); + /// let position = scanner.many(AsciiLetters)?; + /// assert_eq!(position, 5); + /// # Some(()) + /// # } + /// ``` pub fn many(&self, cset: impl CharacterSet) -> Option<usize> { if !cset.contains(*self.source.get(self.position)?) { return None; @@ -102,6 +305,22 @@ impl Scanner { Some(i) } + /// If the remainder of the scanner contains a character from the given + /// `cset`, then the position of the aforementioned character is returned. + /// Otherwise, `None` is returned. + /// + /// # Example + /// + /// ``` + /// use snob::Scanner; + /// + /// # fn foo() -> Option<()> { + /// let scanner = Scanner::new("Hello, world!"); + /// let position = scanner.upto(' ')?; + /// assert_eq!(position, 6); + /// # Some(()) + /// # } + /// ``` pub fn upto(&self, cset: impl CharacterSet) -> Option<usize> { let mut i = self.position; while !cset.contains(*self.source.get(i)?) { |
