summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/csets.rs95
-rw-r--r--src/scanner.rs219
2 files changed, 312 insertions, 2 deletions
diff --git a/src/csets.rs b/src/csets.rs
index bc739b2..9797d1a 100644
--- a/src/csets.rs
+++ b/src/csets.rs
@@ -1,8 +1,45 @@
use std::collections::HashSet;
+/// An unordered set of characters
+///
+/// # Example
+///
+/// ```
+/// use snob::csets::CharacterSet;
+///
+/// struct AsciiCharacter;
+///
+/// impl CharacterSet for AsciiCharacter {
+/// fn contains(&self, ch: char) -> bool {
+/// ch.is_ascii()
+/// }
+/// }
+/// ```
pub trait CharacterSet {
+ /// Returns `true` if the character set contains the given character.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::csets::AsciiLetters;
+ ///
+ /// assert!(AsciiLetters.contains('h'));
+ /// assert!(!AsciiLetters.contains(' '));
+ /// ```
fn contains(&self, ch: char) -> bool;
+ /// Returns a [`CharacterSet`] that contains the characters in the `self`
+ /// set, as well as any characters in the given `other` character set.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::csets::AsciiLetters;
+ ///
+ /// let cset = AsciiLetters.union(' ');
+ /// assert!(cset.contains('h'));
+ /// assert!(cset.contains(' '));
+ /// ```
fn union<Other: CharacterSet>(self, other: Other) -> CharacterSetUnion<Self, Other>
where
Self: Sized,
@@ -13,6 +50,19 @@ pub trait CharacterSet {
}
}
+ /// Returns a [`CharacterSet`] that contains only the characters in both
+ /// of `self` and `other`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::csets::AsciiLetters;
+ ///
+ /// let cset = AsciiLetters.intersection("Hello, world");
+ /// assert!(cset.contains('e'));
+ /// assert!(!cset.contains('a'));
+ /// assert!(!cset.contains(' '));
+ /// ```
fn intersection<Other: CharacterSet>(
self,
other: Other,
@@ -26,6 +76,19 @@ pub trait CharacterSet {
}
}
+ /// Returns a [`CharacterSet`] that contains the characters in the `self`
+ /// character set, unless they are also contained in `other`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::csets::AsciiLetters;
+ ///
+ /// let cset = AsciiLetters.intersection("Hello, world");
+ /// assert!(cset.contains('a'));
+ /// assert!(!cset.contains('e'));
+ /// assert!(!cset.contains(' '));
+ /// ```
fn difference<Other: CharacterSet>(self, other: Other) -> CharacterSetDifference<Self, Other>
where
Self: Sized,
@@ -36,6 +99,18 @@ pub trait CharacterSet {
}
}
+ /// Returns a [`CharacterSet`] that contains all of the characters that are
+ /// NOT contained in the `self` character set.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::csets::AsciiLetters;
+ ///
+ /// let cset = AsciiLetters.complement();
+ /// assert!(!cset.contains('a'));
+ /// assert!(cset.contains(' '));
+ /// ```
fn complement(self) -> CharacterSetComplement<Self>
where
Self: Sized,
@@ -44,6 +119,7 @@ pub trait CharacterSet {
}
}
+/// Contains all Unicode characters
#[derive(Debug, Clone, Copy)]
pub struct AnyCharacter;
@@ -53,6 +129,7 @@ impl CharacterSet for AnyCharacter {
}
}
+/// Contains all ASCII characters
#[derive(Debug, Clone, Copy)]
pub struct Ascii;
@@ -62,6 +139,7 @@ impl CharacterSet for Ascii {
}
}
+/// Contains the ASCII digits, 0-9
#[derive(Debug, Clone, Copy)]
pub struct AsciiDigits;
@@ -71,6 +149,7 @@ impl CharacterSet for AsciiDigits {
}
}
+/// Contains all lowercase ASCII letters, a-z
#[derive(Debug, Clone, Copy)]
pub struct AsciiLowercase;
@@ -80,6 +159,7 @@ impl CharacterSet for AsciiLowercase {
}
}
+/// Contains all uppercase ASCII letters, A-Z
#[derive(Debug, Clone, Copy)]
pub struct AsciiUppercase;
@@ -89,6 +169,7 @@ impl CharacterSet for AsciiUppercase {
}
}
+/// Containes all ASCII letters: a-z, A-Z
#[derive(Debug, Clone, Copy)]
pub struct AsciiLetters;
@@ -122,6 +203,9 @@ impl CharacterSet for HashSet<char> {
}
}
+/// A union of two [`CharacterSet`]s.
+///
+/// This is created by calling [`CharacterSet::union`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct CharacterSetUnion<A: CharacterSet, B: CharacterSet> {
first: A,
@@ -134,6 +218,9 @@ impl<A: CharacterSet, B: CharacterSet> CharacterSet for CharacterSetUnion<A, B>
}
}
+/// An intersection of two [`CharacterSet`]s.
+///
+/// This is created by calling [`CharacterSet::intersection`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct CharacterSetIntersection<A: CharacterSet, B: CharacterSet> {
first: A,
@@ -146,8 +233,10 @@ impl<A: CharacterSet, B: CharacterSet> CharacterSet for CharacterSetIntersection
}
}
+/// The difference of two [`CharacterSet`]s.
+///
+/// This is created by calling [`CharacterSet::difference`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-
pub struct CharacterSetDifference<A: CharacterSet, B: CharacterSet> {
first: A,
second: B,
@@ -159,8 +248,10 @@ impl<A: CharacterSet, B: CharacterSet> CharacterSet for CharacterSetDifference<A
}
}
+/// The complement of a [`CharacterSet`].
+///
+/// This is created by calling [`CharacterSet::complement`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-
pub struct CharacterSetComplement<Inner: CharacterSet> {
inner: Inner,
}
diff --git a/src/scanner.rs b/src/scanner.rs
index f2c4a2d..475098c 100644
--- a/src/scanner.rs
+++ b/src/scanner.rs
@@ -1,5 +1,18 @@
use crate::csets::CharacterSet;
+/// This is used to analyze string. It can be initialized using either
+/// [`Scanner::from`] or [`Scanner::new`].
+///
+/// # Example
+///
+/// ```
+/// use snob::Scanner;
+///
+/// let mut scanner = Scanner::new("Hello, world!");
+/// if let Some(position) = scanner.starts_with("Hello") {
+/// scanner.goto(position);
+/// }
+/// ```
#[derive(Debug, Clone)]
pub struct Scanner {
source: Box<[char]>,
@@ -7,6 +20,15 @@ pub struct Scanner {
}
impl Scanner {
+ /// Create a new Scanner with a given source.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let scanner = Scanner::new("Hello, world!");
+ /// ```
pub fn new(source: impl AsRef<str>) -> Self {
Self {
source: source.as_ref().chars().collect(),
@@ -14,30 +36,117 @@ impl Scanner {
}
}
+ /// Get the full source being used in this scanner, as a slice of characters
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let scanner = Scanner::new("Hello, world!");
+ /// let source = scanner.source().iter().collect::<String>();
+ /// assert_eq!(scanner.source().iter().collect::<String>(), "Hello, world!");
+ /// ```
pub fn source(&self) -> &[char] {
&self.source
}
+ /// Get the full length of the source being used in this scanner
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let scanner = Scanner::new("Hello, world!");
+ /// assert_eq!(scanner.len(), 13);
+ /// ```
pub fn len(&self) -> usize {
self.source.len()
}
+ /// Returns `true` if the scanner's source is an empty string
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let scanner = Scanner::new("Hello, world!");
+ /// assert!(!scanner.is_empty());
+ /// ```
pub fn is_empty(&self) -> bool {
self.len() == 0
}
+ /// Get the character at a given position in the string.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let scanner = Scanner::new("Hello, world!");
+ /// assert!(!scanner.is_empty());
+ /// ```
pub fn char_at(&self, index: usize) -> Option<char> {
self.source.get(index).cloned()
}
+ /// Get the current position in the string. When the [`Scanner`] is
+ /// created, this value is zero.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let mut scanner = Scanner::new("Hello, world!");
+ /// assert_eq!(scanner.position(), 0);
+ /// scanner.advance(5);
+ /// assert_eq!(scanner.position(), 5);
+ /// scanner.goto(3);
+ /// assert_eq!(scanner.position(), 3);
+ /// ```
pub fn position(&self) -> usize {
self.position
}
+ /// Returns true if the scanner's position has reached the end of its
+ /// source.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let mut scanner = Scanner::new("Hello, world!");
+ /// assert!(!scanner.is_at_end());
+ ///
+ /// if let Some(position) = scanner.starts_with("Hello, world!") {
+ /// scanner.goto(position);
+ /// }
+ ///
+ /// assert!(scanner.is_at_end());
+ /// ```
pub fn is_at_end(&self) -> bool {
self.position == self.source.len()
}
+ /// Set the scanner's `position`. If the position out of range out the
+ /// source, then `None` is returned. Otherwise, the subslice from the old
+ /// position to the new position is returned. If the latter is less than
+ /// the former, then the string is reversed.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let mut scanner = Scanner::new("Hello, world!");
+ /// scanner.goto(3);
+ /// assert_eq!(scanner.position(), 3);
+ /// ```
pub fn goto(&mut self, position: usize) -> Option<String> {
// allow reverse ranges
let production = if self.position < position {
@@ -54,11 +163,40 @@ impl Scanner {
Some(production)
}
+ /// Increase the position by the given `amount`. If the new position is out
+ /// of the range of the source, then `None` is returned. Otherwise, the
+ /// subslice from the old position to the new position is returned. If the
+ /// latter is less than the former, then the string is reversed.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// let mut scanner = Scanner::new("Hello, world!");
+ /// scanner.advance(5);
+ /// assert_eq!(scanner.position(), 5);
+ /// ```
pub fn advance(&mut self, amount: isize) -> Option<String> {
let position = self.position.checked_add_signed(amount)?;
self.goto(position)
}
+ /// Looks for the given `substring` in the remainder of the scanner. If the
+ /// substring is found, the position of the first character in the
+ /// substring is returned. Otherwise, `None` is returned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// # fn foo() -> Option<()> {
+ /// let scanner = Scanner::new("Hello, world!");
+ /// let position = scanner.find_substring("lo")?;
+ /// assert_eq!(position, 3);
+ /// # Some(())
+ /// # }
pub fn find_substring(&self, substring: impl AsRef<str>) -> Option<usize> {
self.source
.get(self.position..)?
@@ -67,6 +205,21 @@ impl Scanner {
.find(substring.as_ref())
}
+ /// If `source[position..]` starts with the given string, then this returns
+ /// the ending position of the substring. Otherwise, `None` is returned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// # fn foo() -> Option<()> {
+ /// let scanner = Scanner::new("Hello, world!");
+ /// let position = scanner.starts_with("Hello")?;
+ /// assert_eq!(position, 5);
+ /// # Some(())
+ /// # }
+ /// ```
pub fn starts_with(&self, substring: impl AsRef<str>) -> Option<usize> {
let mut i = self.position;
for substring_char in substring.as_ref().chars() {
@@ -79,16 +232,66 @@ impl Scanner {
Some(i)
}
+ /// If `source[position..]` starts with the given string, then this returns
+ /// a copy of the substring. Otherwise, `None` is returned. This is the
+ /// equivalent of: `self.goto(self.starts_with(substring)?)`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// # fn foo() -> Option<()> {
+ /// let mut scanner = Scanner::new("Hello, world!");
+ /// let substring = scanner.advance_if_starts_with("Hello")?;
+ /// assert_eq!(substring, "Hello");
+ /// assert_eq!(scanner.position(), 5);
+ /// # Some(())
+ /// # }
+ /// ```
pub fn advance_if_starts_with(&mut self, substring: impl AsRef<str>) -> Option<String> {
let position = self.starts_with(substring)?;
self.goto(position)
}
+ /// If the next character in the scanner is contained in the given `cset`,
+ /// then the position after the next character is returned. Otherwise,
+ /// `None` is returned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// # fn foo() -> Option<()> {
+ /// let scanner = Scanner::new("Hello, world!");
+ /// let position = scanner.any('H')?;
+ /// assert_eq!(position, 1);
+ /// # Some(())
+ /// # }
+ /// ```
pub fn any(&self, cset: impl CharacterSet) -> Option<usize> {
cset.contains(*self.source.get(self.position)?)
.then_some(self.position + 1)
}
+ /// If the next character in the scanner is contained in the given `cset`,
+ /// then the position after the longest initial sequence of characters in
+ /// `cset` is returned. Otherwise, `None` is returned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ /// use snob::csets::AsciiLetters;
+ ///
+ /// # fn foo() -> Option<()> {
+ /// let scanner = Scanner::new("Hello, world!");
+ /// let position = scanner.many(AsciiLetters)?;
+ /// assert_eq!(position, 5);
+ /// # Some(())
+ /// # }
+ /// ```
pub fn many(&self, cset: impl CharacterSet) -> Option<usize> {
if !cset.contains(*self.source.get(self.position)?) {
return None;
@@ -102,6 +305,22 @@ impl Scanner {
Some(i)
}
+ /// If the remainder of the scanner contains a character from the given
+ /// `cset`, then the position of the aforementioned character is returned.
+ /// Otherwise, `None` is returned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use snob::Scanner;
+ ///
+ /// # fn foo() -> Option<()> {
+ /// let scanner = Scanner::new("Hello, world!");
+ /// let position = scanner.upto(' ')?;
+ /// assert_eq!(position, 6);
+ /// # Some(())
+ /// # }
+ /// ```
pub fn upto(&self, cset: impl CharacterSet) -> Option<usize> {
let mut i = self.position;
while !cset.contains(*self.source.get(i)?) {