Copyright	(c) Nicklas Botö 2020
License	GPL-3
Maintainer	git@nicbot.xyz
Stability	experimental
Safe Haskell	Trustworthy

Zozo

Contents

Language definition
Regex constructors
- Specialized constructors
Derivatives
Evaluation
Regex sets
- Set combinators
Orphan instances

Description

Library for constructing, exploring, and visualizing regular expressions and their NFA form. Focusing heavily on the Brzozowski derivative but also Thompson's construction (eventually).

Synopsis

data Regex
- = Nil
- | Eps
- | Sym Char
- | Union Regex Regex
- | Inter Regex Regex
- | Comp Regex
- | Conc Regex Regex
- | Star Regex
(<|>) :: Regex -> Regex -> Regex
(<^>) :: Regex -> Regex -> Regex
(***) :: Regex -> Regex -> Regex
neg :: Regex -> Regex
(\\) :: Regex -> Regex -> Regex
r :: String -> Regex
some :: Regex -> Regex
many :: Regex -> Regex
mayb :: Regex -> Regex
alphabet :: String -> Regex
unit :: Bool -> Regex
nu :: Regex -> Regex
derive :: String -> Regex -> Regex
(^-) :: String -> Regex -> Regex
rreduce :: String -> Regex -> Regex
eval :: Regex -> Set String
printMatching :: Regex -> IO ()
match :: String -> Regex -> Bool
(?) :: String -> Regex -> Bool
alpha :: Regex
numeric :: Regex
int :: Regex
alphaNum :: Regex
space :: Regex
spaces :: Regex
whiteSpace :: Regex
ascii :: Regex
between :: Regex -> Regex -> Regex
sepBy :: Regex -> Regex -> Regex
(?>) :: Regex -> Regex -> Regex
(<?) :: Regex -> Regex -> Regex

Language definition

data Regex Source #

A simple regex lang

Constructors

Nil	The empty set
Eps	The empty string, \( \epsilon \)
Sym Char	A symbol as a singleton set
Union Regex Regex	Union of language sets \[ \Lambda \vee \Gamma \hspace{10pt} \Lambda,\Gamma \subseteq \Sigma^* \]
Inter Regex Regex	Intercept of language sets \[ \Lambda \wedge \Gamma \hspace{10pt} \Lambda,\Gamma \subseteq \Sigma^* \]
Comp Regex	Complement of language sets \[ \neg \Gamma \hspace{10pt} \Gamma \subseteq \Sigma^* \]
Conc Regex Regex	All possible concatenations
Star Regex	Kleene star operator

Instances

Instances details

IsList Regex Source #	For use with `OverloadedLists`
Instance details Defined in Zozo Associated Types type Item Regex Methods fromList :: [Item Regex] -> Regex fromListN :: Int -> [Item Regex] -> Regex toList :: Regex -> [Item Regex]
Eq Regex Source #
Instance details Defined in Zozo Methods (==) :: Regex -> Regex -> Bool (/=) :: Regex -> Regex -> Bool
Show Regex Source #
Instance details Defined in Zozo Methods showsPrec :: Int -> Regex -> ShowS show :: Regex -> String showList :: [Regex] -> ShowS
IsString Regex Source #	For use with `OverloadedStrings`
Instance details Defined in Zozo Methods fromString :: String -> Regex
Semigroup Regex Source #
Instance details Defined in Zozo Methods (<>) :: Regex -> Regex -> Regex sconcat :: NonEmpty Regex -> Regex stimes :: Integral b => b -> Regex -> Regex
Monoid Regex Source #
Instance details Defined in Zozo Methods mempty :: Regex mappend :: Regex -> Regex -> Regex mconcat :: [Regex] -> Regex
type Item Regex Source #
Instance details Defined in Zozo type Item Regex = String

Regex constructors

(<|>) :: Regex -> Regex -> Regex infixl 6 Source #

Synonym for the Union constructor

(<^>) :: Regex -> Regex -> Regex infixl 6 Source #

Synonym for the Inter constructor

(***) :: Regex -> Regex -> Regex infixl 6 Source #

Synonym for the Conc constructor

neg :: Regex -> Regex Source #

Synonym for the Comp constructor

(\\) :: Regex -> Regex -> Regex infixl 6 Source #

Language set difference \[ \Lambda \backslash \Gamma \hspace{10pt} \Lambda,\Gamma \subseteq \Sigma^* \]

Specialized constructors

r :: String -> Regex Source #

Treat string as concatenation of symbol regexes

 r"foo" = Sym 'f' *** Sym 'o' *** Sym 'o'

some :: Regex -> Regex Source #

Synonym for Star constructor That is, zero or more.

 ^r*$

many :: Regex -> Regex Source #

One or more

 ^r+$

mayb :: Regex -> Regex Source #

Zero or one

 ^r?$

alphabet :: String -> Regex Source #

Fold string over language union

 numericChar = alphabet ['0'..'9']

unit :: Bool -> Regex Source #

\[ \mathbb{1}_x = \begin{cases} \epsilon \quad & x = \top \\ \varnothing \quad & \text{otherwise} \\ \end{cases} \]

nu :: Regex -> Regex Source #

Auxiliary function to derive where \[ \nu(R) = \begin{cases} \epsilon \quad & \epsilon \in R\\ \varnothing \quad & \text{otherwise} \\ \end{cases} \] Equivalent to

 unit . (Eps mem)

Derivatives

derive :: String -> Regex -> Regex Source #

Defining the (Brzozowski) derivative of a language \(L\) with respect to the string \(u \in \Sigma^*\) to be \[ u^{-1} L = \left\{ v : uv \in L \right\} \]

(^-) :: String -> Regex -> Regex infixl 5 Source #

Infix derive with regex symbol input

Evaluation

rreduce :: String -> Regex -> Regex Source #

Continue reducing a regex

eval :: Regex -> Set String Source #

Evaluate regex to a set of strings

printMatching :: Regex -> IO () Source #

Print all strings that match a regex. Note that this does not print the full list when the number of matches is infinite.

>>> printMatching $ some "a"
{"a"}*

and not

 
 a
 aa
 aaa
 ...

If you want to such matches, please use the derivative (^-) or the match function (?)

match :: String -> Regex -> Bool Source #

Check if regex matches. Essentially checking if the derivative with respect to a string contains the empty string, \(\epsilon\).

(?) :: String -> Regex -> Bool infixl 5 Source #

Infix synonym for match

Regex sets

alpha :: Regex Source #

Alphabetic characters

numeric :: Regex Source #

Numeric characters

int :: Regex Source #

Positive or negative integer

alphaNum :: Regex Source #

Alpha-numeric characters

space :: Regex Source #

Single space character

spaces :: Regex Source #

One or more spaces

whiteSpace :: Regex Source #

Any whitespace character

ascii :: Regex Source #

Any ASCII character

Set combinators

between :: Regex -> Regex -> Regex Source #

Regex inside delimiters

>>> "'a'" ? ascii `between` "'"
True

sepBy :: Regex -> Regex -> Regex Source #

One or more regexes seperated by another regex

 list :: Regex
 list = "[" <> int `sepBy` "," <> "]"

(?>) :: Regex -> Regex -> Regex Source #

Alternating concat

 "'" ?> "a" = "'a" <|> "a"

(<?) :: Regex -> Regex -> Regex Source #

Suffix variant of (?>)

Orphan instances

Show (Set String) Source #
Instance details Methods showsPrec :: Int -> Set String -> ShowS show :: Set String -> String showList :: [Set String] -> ShowS