Refactoring: make Grammar an abstract class

This commit is contained in:
Alberto Venturini 2021-07-03 15:42:28 +02:00
parent c9c1454f90
commit 301eaaf98d
18 changed files with 395 additions and 163 deletions

View file

@ -26,6 +26,12 @@
</properties> </properties>
<dependencies> <dependencies>
<dependency>
<groupId>com.albertoventurini.parsley</groupId>
<artifactId>parsley-grammar</artifactId>
<version>0.1.0-SNAPSHOT</version>
</dependency>
<dependency> <dependency>
<groupId>org.junit.jupiter</groupId> <groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId> <artifactId>junit-jupiter</artifactId>
@ -37,10 +43,7 @@
<artifactId>junit-jupiter-engine</artifactId> <artifactId>junit-jupiter-engine</artifactId>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>com.albertoventurini.parsley</groupId>
<artifactId>parsley-grammar</artifactId>
</dependency>
</dependencies> </dependencies>
<build> <build>

View file

@ -2,81 +2,17 @@ package com.albertoventurini.parsley.examples.graphml;
import com.albertoventurini.parsley.grammar.Grammar; import com.albertoventurini.parsley.grammar.Grammar;
import com.albertoventurini.parsley.grammar.ParseTree; import com.albertoventurini.parsley.grammar.ParseTree;
import com.albertoventurini.parsley.grammar.rules.Rule;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static com.albertoventurini.parsley.grammar.Grammar.*;
public class GrammarGraphMLParser implements GraphMLParser { public class GrammarGraphMLParser implements GraphMLParser {
private final Grammar grammar; private final Grammar grammar = new GraphMLGrammar();
public GrammarGraphMLParser() {
final Rule comment = sequence(string("<!--"), until("-->")).as("comment");
final Rule comments = zeroOrMore(comment).as("comments");
final Rule xmlHeader = string("<?xml version='1.0' ?>").as("header");
final Rule attribute = sequence(
takeWhile(c -> c != '=' && c != '>'),
character('='),
character('\''),
takeWhile(c -> c != '\''),
character('\'')).as("attribute");
final Function<String, Rule> tagFunc = (s) -> sequence(
character('<'),
string(s).as("tagName"),
zeroOrMore(attribute).as("attributes"),
character('>')).as("tag:" + s);
final Function<String, Rule> closingTagFunc = (s) -> sequence(
string("</"),
string(s),
character('>')).as("closingTag:" + s);
final BiFunction<String, Rule, Rule> elementFunc = (tagName, content) -> sequence(
tagFunc.apply(tagName).as("tag:" + tagName),
content.as("content"),
closingTagFunc.apply(tagName).as("closingTag:" + tagName));
final Rule data = elementFunc.apply("data", takeWhile(c -> c != '<')).as("dataElem");
final Rule node = elementFunc.apply("node", zeroOrMore(data)).as("nodeElem");
final Rule edge = elementFunc.apply("edge", zeroOrMore(data)).as("edgeElem");
final Rule graph = sequence(
tagFunc.apply("graph"),
oneOrMore(node).as("nodes"),
oneOrMore(edge).as("edges"),
closingTagFunc.apply("graph")).as("graphElem");
final Rule key = sequence(tagFunc.apply("key"), closingTagFunc.apply("key"))
.as("keyElem");
final Rule graphml = sequence(
tagFunc.apply("graphml"),
oneOrMore(key).as("keys"),
graph,
closingTagFunc.apply("graphml")).as("graphmlElem");
final Rule graphmlFile = sequence(
xmlHeader,
comments.discard(),
graphml).as("graphmlFile");
grammar = new Grammar(graphmlFile, null);
}
@Override @Override
public GraphMLParseResult parse(final String filePath) { public GraphMLParseResult parse(final String filePath) {

View file

@ -0,0 +1,76 @@
package com.albertoventurini.parsley.examples.graphml;
import com.albertoventurini.parsley.grammar.Grammar;
import com.albertoventurini.parsley.grammar.rules.Rule;
import java.util.function.BiFunction;
import java.util.function.Function;
import static com.albertoventurini.parsley.grammar.rules.Rules.*;
public class GraphMLGrammar extends Grammar {
private final Rule comment = sequence(string("<!--"), until("-->")).as("comment");
private final Rule comments = zeroOrMore(comment).setComment(true).as("comments");
private final Rule xmlHeader = string("<?xml version='1.0' ?>").as("header");
private final Rule attribute = sequence(
takeWhile(c -> c != '=' && c != '>'),
character('='),
character('\''),
takeWhile(c -> c != '\''),
character('\'')).as("attribute");
private final Function<String, Rule> tagFunc = (s) -> sequence(
character('<'),
string(s).as("tagName"),
zeroOrMore(attribute).as("attributes"),
character('>')).as("tag:" + s);
private final Function<String, Rule> closingTagFunc = (s) -> sequence(
string("</"),
string(s),
character('>')).as("closingTag:" + s);
private final BiFunction<String, Rule, Rule> elementFunc = (tagName, content) -> sequence(
tagFunc.apply(tagName).as("tag:" + tagName),
content.as("content"),
closingTagFunc.apply(tagName).as("closingTag:" + tagName));
private final Rule data = elementFunc.apply("data", takeWhile(c -> c != '<')).as("dataElem");
private final Rule node = elementFunc.apply("node", zeroOrMore(data)).as("nodeElem");
private final Rule edge = elementFunc.apply("edge", zeroOrMore(data)).as("edgeElem");
private final Rule graph = sequence(
tagFunc.apply("graph"),
oneOrMore(node).as("nodes"),
oneOrMore(edge).as("edges"),
closingTagFunc.apply("graph")).as("graphElem");
private final Rule key = sequence(tagFunc.apply("key"), closingTagFunc.apply("key"))
.as("keyElem");
private final Rule graphml = sequence(
tagFunc.apply("graphml"),
oneOrMore(key).as("keys"),
graph,
closingTagFunc.apply("graphml")).as("graphmlElem");
private final Rule graphmlFile = sequence(
xmlHeader,
comments.discard(),
graphml).as("graphmlFile");
@Override
protected Rule startRule() {
return graphmlFile;
}
@Override
protected Rule commentRule() {
return comments;
}
}

View file

@ -0,0 +1,35 @@
package com.albertoventurini.parsley.examples.markdown;
import com.albertoventurini.parsley.grammar.Grammar;
import com.albertoventurini.parsley.grammar.rules.Rule;
import static com.albertoventurini.parsley.grammar.rules.Rules.*;
public class MarkdownGrammar extends Grammar {
private final Rule h1 = sequence(
character('#'),
takeWhile(c -> c != '\n')
);
private final Rule paragraph = sequence(
character('\n'),
takeWhile(c -> c != '\n')
);
private final Rule element = oneOf(h1, paragraph);
private final Rule elements = sequence(element);
private final Rule document = zeroOrMore(elements);
@Override
public Rule startRule() {
return document;
}
@Override
protected Rule commentRule() {
return null;
}
}

View file

@ -0,0 +1,4 @@
package com.albertoventurini.parsley.examples.markdown;
public class MarkdownParseResult {
}

View file

@ -0,0 +1,5 @@
package com.albertoventurini.parsley.examples.markdown;
public interface MarkdownParser {
MarkdownParseResult parse(String filePath);
}

View file

@ -1,6 +1,7 @@
package com.albertoventurini.parsley.examples.graphml; package com.albertoventurini.parsley.examples.graphml;
import com.albertoventurini.parsley.grammar.Grammar; import com.albertoventurini.parsley.grammar.Grammar;
import com.albertoventurini.parsley.grammar.GrammarFactory;
import com.albertoventurini.parsley.grammar.rules.Rule; import com.albertoventurini.parsley.grammar.rules.Rule;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -9,7 +10,7 @@ import java.nio.file.Path;
import java.util.function.BiFunction; import java.util.function.BiFunction;
import java.util.function.Function; import java.util.function.Function;
import static com.albertoventurini.parsley.grammar.Grammar.*; import static com.albertoventurini.parsley.grammar.rules.Rules.*;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
public class GraphMLGrammarTest { public class GraphMLGrammarTest {
@ -69,7 +70,7 @@ public class GraphMLGrammarTest {
final Rule graphmlFile = sequence(xmlHeader, graphml).as("graphmlFile"); final Rule graphmlFile = sequence(xmlHeader, graphml).as("graphmlFile");
grammar = new Grammar(graphmlFile, comments); grammar = GrammarFactory.newGrammar(graphmlFile, comments);
} }
@Test @Test

View file

@ -0,0 +1,13 @@
package com.albertoventurini.parsley.examples.graphml;
import com.albertoventurini.parsley.examples.markdown.MarkdownGrammar;
import org.junit.jupiter.api.Test;
public class MarkdownGrammarTest {
@Test
public void markdownGrammar_withH1AndParagraph_shouldParse() {
final MarkdownGrammar grammar = new MarkdownGrammar();
}
}

View file

@ -0,0 +1,155 @@
# An h1 header
Paragraphs are separated by a blank line.
2nd paragraph. *Italic*, **bold**, and `monospace`. Itemized lists
look like:
* this one
* that one
* the other one
Note that --- not considering the asterisk --- the actual text
content starts at 4-columns in.
> Block quotes are
> written like so.
>
> They can span multiple paragraphs,
> if you like.
Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., "it's all
in chapters 12--14"). Three dots ... will be converted to an ellipsis.
Unicode is supported. ☺
# An h2 header
Here's a numbered list:
1. first item
2. second item
3. third item
Note again how the actual text starts at 4 columns in (4 characters
from the left side). Here's a code sample:
# Let me re-iterate ...
for i in 1 .. 10 { do-something(i) }
As you probably guessed, indented 4 spaces. By the way, instead of
indenting the block, you can use delimited blocks, if you like:
~~~
define foobar() {
print "Welcome to flavor country!";
}
~~~
(which makes copying & pasting easier). You can optionally mark the
delimited block for Pandoc to syntax highlight it:
~~~python
import time
# Quick, count to ten!
for i in range(10):
# (but not *too* quick)
time.sleep(0.5)
print i
~~~
### An h3 header
Now a nested list:
1. First, get these ingredients:
* carrots
* celery
* lentils
2. Boil some water.
3. Dump everything in the pot and follow
this algorithm:
find wooden spoon
uncover pot
stir
cover pot
balance wooden spoon precariously on pot handle
wait 10 minutes
goto first step (or shut off burner when done)
Do not bump wooden spoon or it will fall.
Notice again how text always lines up on 4-space indents (including
that last line which continues item 3 above).
Here's a link to [a website](http://foo.bar), to a [local
doc](local-doc.html), and to a [section heading in the current
doc](#an-h2-header). Here's a footnote [^1].
[^1]: Footnote text goes here.
Tables can look like this:
size material color
---- ------------ ------------
9 leather brown
10 hemp canvas natural
11 glass transparent
Table: Shoes, their sizes, and what they're made of
(The above is the caption for the table.) Pandoc also supports
multi-line tables:
-------- -----------------------
keyword text
-------- -----------------------
red Sunsets, apples, and
other red or reddish
things.
green Leaves, grass, frogs
and other things it's
not easy being.
-------- -----------------------
A horizontal rule follows.
***
Here's a definition list:
apples
: Good for making applesauce.
oranges
: Citrus!
tomatoes
: There's no "e" in tomatoe.
Again, text is indented 4 spaces. (Put a blank line between each
term/definition pair to spread things out more.)
Here's a "line block":
| Line one
| Line too
| Line tree
and images can be specified like so:
![example image](example-image.jpg "An exemplary image")
Inline math equations go in like so: $\omega = d\phi / dt$. Display
math should get its own line and be put in in double-dollarsigns:
$$I = \int \rho R^{2} dV$$
And note that you can backslash-escape any punctuation characters
which you wish to be displayed literally, ex.: \`foo\`, \*bar\*, etc.

View file

@ -1,89 +1,18 @@
package com.albertoventurini.parsley.grammar; package com.albertoventurini.parsley.grammar;
import com.albertoventurini.parsley.grammar.rules.AnyCharacter;
import com.albertoventurini.parsley.grammar.rules.MatchCharacter;
import com.albertoventurini.parsley.grammar.rules.MatchString;
import com.albertoventurini.parsley.grammar.rules.OneOf;
import com.albertoventurini.parsley.grammar.rules.OneOrMore;
import com.albertoventurini.parsley.grammar.rules.Rule; import com.albertoventurini.parsley.grammar.rules.Rule;
import com.albertoventurini.parsley.grammar.rules.Sequence;
import com.albertoventurini.parsley.grammar.rules.TakeWhileCharacter;
import com.albertoventurini.parsley.grammar.rules.UntilString;
import com.albertoventurini.parsley.grammar.rules.Wrapper;
import com.albertoventurini.parsley.grammar.rules.ZeroOrMore;
import com.albertoventurini.parsley.grammar.rules.ZeroOrOne;
import java.util.Optional; import java.util.Optional;
import java.util.function.Predicate;
public class Grammar { public abstract class Grammar {
private final Rule startRule; protected abstract Rule startRule();
private final Rule commentRule; protected abstract Rule commentRule();
public Grammar(
final Rule startRule,
final Rule commentRule) {
this.startRule = startRule;
this.commentRule = commentRule;
// TODO: better than this
if (commentRule != null) {
commentRule.setComment(true);
}
}
public Optional<ParseTree> parse(final String text) { public Optional<ParseTree> parse(final String text) {
final var ctx = new GrammarContext(text, commentRule); final var ctx = new GrammarContext(text, commentRule());
return startRule.apply(ctx); return startRule().apply(ctx);
} }
public static MatchCharacter character(final char c) {
return new MatchCharacter(c);
}
public static AnyCharacter anyChar() {
return new AnyCharacter();
}
public static MatchString string(final String s) {
return new MatchString(s);
}
public static TakeWhileCharacter token() {
return new TakeWhileCharacter();
}
public static TakeWhileCharacter takeWhile(final Predicate<Character> characterPredicate) {
return new TakeWhileCharacter(characterPredicate);
}
public static UntilString until(final String s) {
return new UntilString(s);
}
public static Sequence sequence(final Rule... rules) {
return new Sequence(rules);
}
public static OneOf oneOf(final Rule... rules) {
return new OneOf(rules);
}
public static ZeroOrMore zeroOrMore(final Rule childRule) {
return new ZeroOrMore(childRule);
}
public static ZeroOrOne zeroOrOne(final Rule childRule) {
return new ZeroOrOne(childRule);
}
public static OneOrMore oneOrMore(final Rule childRule) {
return new OneOrMore(childRule);
}
public static Wrapper wrapper() {
return new Wrapper();
}
} }

View file

@ -0,0 +1,27 @@
package com.albertoventurini.parsley.grammar;
import com.albertoventurini.parsley.grammar.rules.Rule;
public class GrammarFactory {
public static Grammar newGrammar(final Rule startRule, final Rule commentRule) {
// TODO: better than this
if (commentRule != null) {
commentRule.setComment(true);
}
return new Grammar() {
@Override
protected Rule startRule() {
return startRule;
}
@Override
protected Rule commentRule() {
return commentRule;
}
};
}
}

View file

@ -2,7 +2,7 @@ package com.albertoventurini.parsley.grammar;
import java.util.Arrays; import java.util.Arrays;
public class ParseContext { class ParseContext {
private final char[] charArr; private final char[] charArr;
private int cursor; private int cursor;

View file

@ -4,7 +4,7 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
public class ParseTree { public abstract class ParseTree {
private final String text; private final String text;

View file

@ -10,7 +10,6 @@ public final class MatchCharacter extends Rule {
public MatchCharacter(final char c) { public MatchCharacter(final char c) {
this.c = c; this.c = c;
discard = true; discard = true;
} }

View file

@ -42,7 +42,8 @@ public abstract class Rule {
public abstract Optional<ParseTree> tryApply(final GrammarContext ctx); public abstract Optional<ParseTree> tryApply(final GrammarContext ctx);
public void setComment(final boolean comment) { public Rule setComment(final boolean comment) {
isComment = comment; isComment = comment;
return this;
} }
} }

View file

@ -0,0 +1,54 @@
package com.albertoventurini.parsley.grammar.rules;
import java.util.function.Predicate;
public class Rules {
public static MatchCharacter character(final char c) {
return new MatchCharacter(c);
}
public static AnyCharacter anyChar() {
return new AnyCharacter();
}
public static MatchString string(final String s) {
return new MatchString(s);
}
public static TakeWhileCharacter token() {
return new TakeWhileCharacter();
}
public static TakeWhileCharacter takeWhile(final Predicate<Character> characterPredicate) {
return new TakeWhileCharacter(characterPredicate);
}
public static UntilString until(final String s) {
return new UntilString(s);
}
public static Sequence sequence(final Rule... rules) {
return new Sequence(rules);
}
public static OneOf oneOf(final Rule... rules) {
return new OneOf(rules);
}
public static ZeroOrMore zeroOrMore(final Rule childRule) {
return new ZeroOrMore(childRule);
}
public static ZeroOrOne zeroOrOne(final Rule childRule) {
return new ZeroOrOne(childRule);
}
public static OneOrMore oneOrMore(final Rule childRule) {
return new OneOrMore(childRule);
}
public static Wrapper wrapper() {
return new Wrapper();
}
}

View file

@ -20,7 +20,7 @@ public final class Sequence extends Rule {
final int start = ctx.getCursor(); final int start = ctx.getCursor();
final List<ParseTree> children = new ArrayList<>(rules.length); final List<ParseTree> children = new ArrayList<>(rules.length);
for (final com.albertoventurini.parsley.grammar.rules.Rule rule : rules) { for (final Rule rule : rules) {
if (!ctx.hasNext()) { if (!ctx.hasNext()) {
ctx.setCursor(start); ctx.setCursor(start);
return Optional.empty(); return Optional.empty();

View file

@ -5,13 +5,7 @@ import org.junit.jupiter.api.Test;
import java.util.function.Function; import java.util.function.Function;
import static com.albertoventurini.parsley.grammar.Grammar.character; import static com.albertoventurini.parsley.grammar.rules.Rules.*;
import static com.albertoventurini.parsley.grammar.Grammar.oneOf;
import static com.albertoventurini.parsley.grammar.Grammar.sequence;
import static com.albertoventurini.parsley.grammar.Grammar.string;
import static com.albertoventurini.parsley.grammar.Grammar.takeWhile;
import static com.albertoventurini.parsley.grammar.Grammar.token;
import static com.albertoventurini.parsley.grammar.Grammar.zeroOrMore;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
@ -24,7 +18,7 @@ public class GrammarTest {
takeWhile(Character::isLetterOrDigit), takeWhile(Character::isLetterOrDigit),
oneOf(string("/>"), character('>'))); oneOf(string("/>"), character('>')));
final Grammar grammar = new Grammar(tag, null); final Grammar grammar = GrammarFactory.newGrammar(tag, null);
final var parseTree1 = grammar.parse("<tag1>"); final var parseTree1 = grammar.parse("<tag1>");
@ -49,7 +43,7 @@ public class GrammarTest {
zeroOrMore(attribute), zeroOrMore(attribute),
oneOf(string("/>"), character('>'))); oneOf(string("/>"), character('>')));
final Grammar grammar = new Grammar(tag, null); final Grammar grammar = GrammarFactory.newGrammar(tag, null);
assertTrue(grammar.parse("<tag1>").isPresent()); assertTrue(grammar.parse("<tag1>").isPresent());
@ -111,7 +105,7 @@ public class GrammarTest {
zeroOrMore(attribute), zeroOrMore(attribute),
oneOf(string("/>"), character('>'))); oneOf(string("/>"), character('>')));
final Grammar grammar = new Grammar(tag, null); final Grammar grammar = GrammarFactory.newGrammar(tag, null);
assertTrue(grammar.parse("<graphml xmlns='http://graphml.graphdrawing.org/xmlns'>").isPresent()); assertTrue(grammar.parse("<graphml xmlns='http://graphml.graphdrawing.org/xmlns'>").isPresent());
} }
@ -140,7 +134,7 @@ public class GrammarTest {
tagFunc.apply("key"), tagFunc.apply("key"),
closingTagFunc.apply("key")); closingTagFunc.apply("key"));
final Grammar grammar = new Grammar(key, null); final Grammar grammar = GrammarFactory.newGrammar(key, null);
assertTrue(grammar.parse("<key id='type' for='node' attr.name='type' attr.type='string'></key>").isPresent()); assertTrue(grammar.parse("<key id='type' for='node' attr.name='type' attr.type='string'></key>").isPresent());
} }