Created 2 modules: parsley-grammar and parsley-examples

This commit is contained in:
Alberto Venturini 2021-07-02 13:12:06 +02:00
parent 6be7f76372
commit c9c1454f90
29 changed files with 278032 additions and 57 deletions

82
parsley-grammar/pom.xml Normal file
View file

@ -0,0 +1,82 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>parsley-grammar</artifactId>
<packaging>jar</packaging>
<version>0.1.0-SNAPSHOT</version>
<name>parsley-grammar</name>
<parent>
<groupId>com.albertoventurini.parsley</groupId>
<artifactId>parsley</artifactId>
<version>0.1.0-SNAPSHOT</version>
</parent>
<url>https://github.com/albertoventurini/parsley</url>
<properties>
<antlr.version>4.7.1</antlr.version>
<junit.version>5.6.1</junit.version>
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
<maven-surefire-plugin.version>3.0.0-M5</maven-surefire-plugin.version>
<maven-jar-plugin.version>3.2.0</maven-jar-plugin.version>
<maven-shade.plugin.version>3.2.2</maven-shade.plugin.version>
<mainClass>com.albertoventurini.parsley.Main</mainClass>
</properties>
<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven-compiler-plugin.version}</version>
<configuration>
<source>11</source>
<target>11</target>
<release>11</release>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>${maven-surefire-plugin.version}</version>
<configuration>
<argLine>--enable-preview</argLine>
</configuration>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>${mainClass}</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,89 @@
package com.albertoventurini.parsley.grammar;
import com.albertoventurini.parsley.grammar.rules.AnyCharacter;
import com.albertoventurini.parsley.grammar.rules.MatchCharacter;
import com.albertoventurini.parsley.grammar.rules.MatchString;
import com.albertoventurini.parsley.grammar.rules.OneOf;
import com.albertoventurini.parsley.grammar.rules.OneOrMore;
import com.albertoventurini.parsley.grammar.rules.Rule;
import com.albertoventurini.parsley.grammar.rules.Sequence;
import com.albertoventurini.parsley.grammar.rules.TakeWhileCharacter;
import com.albertoventurini.parsley.grammar.rules.UntilString;
import com.albertoventurini.parsley.grammar.rules.Wrapper;
import com.albertoventurini.parsley.grammar.rules.ZeroOrMore;
import com.albertoventurini.parsley.grammar.rules.ZeroOrOne;
import java.util.Optional;
import java.util.function.Predicate;
public class Grammar {
private final Rule startRule;
private final Rule commentRule;
public Grammar(
final Rule startRule,
final Rule commentRule) {
this.startRule = startRule;
this.commentRule = commentRule;
// TODO: better than this
if (commentRule != null) {
commentRule.setComment(true);
}
}
public Optional<ParseTree> parse(final String text) {
final var ctx = new GrammarContext(text, commentRule);
return startRule.apply(ctx);
}
public static MatchCharacter character(final char c) {
return new MatchCharacter(c);
}
public static AnyCharacter anyChar() {
return new AnyCharacter();
}
public static MatchString string(final String s) {
return new MatchString(s);
}
public static TakeWhileCharacter token() {
return new TakeWhileCharacter();
}
public static TakeWhileCharacter takeWhile(final Predicate<Character> characterPredicate) {
return new TakeWhileCharacter(characterPredicate);
}
public static UntilString until(final String s) {
return new UntilString(s);
}
public static Sequence sequence(final Rule... rules) {
return new Sequence(rules);
}
public static OneOf oneOf(final Rule... rules) {
return new OneOf(rules);
}
public static ZeroOrMore zeroOrMore(final Rule childRule) {
return new ZeroOrMore(childRule);
}
public static ZeroOrOne zeroOrOne(final Rule childRule) {
return new ZeroOrOne(childRule);
}
public static OneOrMore oneOrMore(final Rule childRule) {
return new OneOrMore(childRule);
}
public static Wrapper wrapper() {
return new Wrapper();
}
}

View file

@ -0,0 +1,47 @@
package com.albertoventurini.parsley.grammar;
import com.albertoventurini.parsley.grammar.rules.Rule;
public class GrammarContext extends ParseContext {
private final Rule commentRule;
private boolean inComment;
GrammarContext(
final String string,
final Rule commentRule) {
super(string);
this.commentRule = commentRule;
}
public void advanceToNextToken() {
discardWhitespaces();
if (!inComment) {
discardComments();
discardWhitespaces();
}
}
public boolean isInComment() {
return inComment;
}
public void setInComment(final boolean inComment) {
this.inComment = inComment;
}
private void discardComments() {
if (hasNext() && commentRule != null) {
commentRule.apply(this);
}
}
private void discardWhitespaces() {
while (hasNext()) {
final char c = peek();
if (!(Character.isWhitespace(c) || c == '\n')) {
break;
}
advance();
}
}
}

View file

@ -0,0 +1,78 @@
package com.albertoventurini.parsley.grammar;
import java.util.Arrays;
public class ParseContext {
private final char[] charArr;
private int cursor;
public ParseContext(final String string) {
charArr = string.toCharArray();
}
public char next() {
char c = peek();
cursor++;
return c;
}
public char peek() {
if (cursor >= charArr.length) {
throw new ParseException("No more characters to parse");
}
return charArr[cursor];
}
public boolean hasNext() {
return cursor < charArr.length;
}
public int getCursor() {
return cursor;
}
public void advance() {
advance(1);
}
public void advance(final int i) {
cursor += i;
}
public void setCursor(final int cursor) {
this.cursor = cursor;
}
public void consume(final char c) {
final char n = next();
if (n != c) {
throw new ParseException("Expected character " + c + ", instead found " + n);
}
}
public String substring(final int start, final int end) {
return String.valueOf(Arrays.copyOfRange(charArr, start, end));
}
public String substring(final int start) {
return substring(start, cursor);
}
public String substring() {
return String.valueOf(Arrays.copyOfRange(charArr, cursor, charArr.length));
}
public boolean matches(final String s) {
if (cursor + s.length() > charArr.length) {
return false;
}
for (int i = 0; i < s.length(); i++) {
if (charArr[cursor + i] != s.charAt(i)) {
return false;
}
}
return true;
}
}

View file

@ -0,0 +1,8 @@
package com.albertoventurini.parsley.grammar;
public class ParseException extends RuntimeException {
public ParseException(final String message) {
super(message);
}
}

View file

@ -0,0 +1,75 @@
package com.albertoventurini.parsley.grammar;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
public class ParseTree {
private final String text;
private final List<ParseTree> children;
public static final class Leaf extends ParseTree {
public Leaf(final String text) {
super(text, Collections.emptyList());
}
}
public static final class Node extends ParseTree {
public Node(final String text, final List<ParseTree> children) {
super(text, children);
}
}
public ParseTree(final String text, final List<ParseTree> children) {
this.text = text;
this.children = children;
}
public static Leaf leaf(final String text) {
return new Leaf(text);
}
public static Node node(final String text, final List<ParseTree> children) {
return new Node(text, children);
}
public String getText() {
return text;
}
public List<ParseTree> getChildren() {
return children;
}
public ParseTree child(final int i) {
return children.get(i);
}
public Optional<ParseTree> getFirstDescendantByName(final String name) {
if (name.equals(text)) {
return Optional.of(this);
}
for (final ParseTree child : children) {
final Optional<ParseTree> descendant = child.getFirstDescendantByName(name);
if (descendant.isPresent()) {
return descendant;
}
}
return Optional.empty();
}
public ParseTree child(final String name) {
return children.stream().filter(c -> name.equals(c.text))
.findFirst()
.orElseThrow(() -> new RuntimeException("Child not found: " + name));
}
@Override
public String toString() {
return text;
}
}

View file

@ -0,0 +1,18 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
public final class AnyCharacter extends Rule {
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
return Optional.of(ParseTree.leaf(Character.toString(ctx.next())));
}
@Override
public String toString() {
return "AnyCharacter{}";
}
}

View file

@ -0,0 +1,34 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
public final class MatchCharacter extends Rule {
private final char c;
public MatchCharacter(final char c) {
this.c = c;
discard = true;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
ctx.advanceToNextToken();
if (ctx.peek() == c) {
return Optional.of(ParseTree.leaf(Character.toString(ctx.next())));
} else {
return Optional.empty();
}
}
@Override
public String toString() {
return "Character{" +
"c=" + c +
'}';
}
}

View file

@ -0,0 +1,35 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
public final class MatchString extends Rule {
private final String s;
public MatchString(final String s) {
this.s = s;
discard = true;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
ctx.advanceToNextToken();
if (ctx.matches(s)) {
ctx.advance(s.length());
return Optional.of(ParseTree.leaf(s));
} else {
return Optional.empty();
}
}
@Override
public String toString() {
return "MatchString{" +
"s='" + s + '\'' +
'}';
}
}

View file

@ -0,0 +1,37 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Arrays;
import java.util.Optional;
public final class OneOf extends Rule {
private final Rule[] rules;
public OneOf(final Rule... rules) {
this.rules = rules;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
final int start = ctx.getCursor();
for (final Rule rule : rules) {
final Optional<ParseTree> result = rule.apply(ctx);
if (result.isPresent()) {
return result;
}
}
ctx.setCursor(start);
return Optional.empty();
}
@Override
public String toString() {
return "OneOf{" +
"rules=" + Arrays.toString(rules) +
'}';
}
}

View file

@ -0,0 +1,46 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
public class OneOrMore extends Rule {
private final Rule childRule;
public OneOrMore(final Rule childRule) {
this.childRule = childRule;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
final List<ParseTree> children = new ArrayList<>();
final Optional<ParseTree> firstChild = childRule.apply(ctx);
if (firstChild.isEmpty()) {
return Optional.empty();
}
children.add(firstChild.get());
while (ctx.hasNext()) {
final Optional<ParseTree> child = childRule.apply(ctx);
if (child.isEmpty()) {
break;
} else {
children.add(child.get());
}
}
return Optional.of(ParseTree.node(name, children));
}
@Override
public String toString() {
return "OneOrMore{" +
childRule +
'}';
}
}

View file

@ -0,0 +1,48 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
public abstract class Rule {
protected String name;
public boolean isComment = false;
protected boolean discard = false;
public Rule as(final String name) {
this.name = name;
return this;
}
public Rule discard() {
discard = true;
return this;
}
public Rule discard(final boolean discard) {
this.discard = discard;
return this;
}
public Optional<ParseTree> apply(final GrammarContext ctx) {
final boolean inComment = ctx.isInComment();
try {
if (!inComment && isComment) {
ctx.setInComment(true);
}
return tryApply(ctx);
} finally {
ctx.setInComment(inComment);
}
}
public abstract Optional<ParseTree> tryApply(final GrammarContext ctx);
public void setComment(final boolean comment) {
isComment = comment;
}
}

View file

@ -0,0 +1,48 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
public final class Sequence extends Rule {
private final Rule[] rules;
public Sequence(final Rule... rules) {
this.rules = rules;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
final int start = ctx.getCursor();
final List<ParseTree> children = new ArrayList<>(rules.length);
for (final com.albertoventurini.parsley.grammar.rules.Rule rule : rules) {
if (!ctx.hasNext()) {
ctx.setCursor(start);
return Optional.empty();
}
final Optional<ParseTree> child = rule.apply(ctx);
if (child.isEmpty()) {
ctx.setCursor(start);
return Optional.empty();
} else if (!rule.discard) {
children.add(child.get());
}
}
return Optional.of(ParseTree.node(name, children));
}
@Override
public String toString() {
return "Sequence{" +
"rules=" + Arrays.toString(rules) +
'}';
}
}

View file

@ -0,0 +1,48 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
import java.util.function.Predicate;
public final class TakeWhileCharacter extends Rule {
private final Predicate<Character> characterPredicate;
private static final Predicate<Character> DEFAULT_PREDICATE = (c) -> !Character.isWhitespace(c);
public TakeWhileCharacter() {
this(DEFAULT_PREDICATE);
}
public TakeWhileCharacter(final Predicate<Character> characterPredicate) {
this.characterPredicate = characterPredicate;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
ctx.advanceToNextToken();
final int start = ctx.getCursor();
while (ctx.hasNext()) {
if (!characterPredicate.test(ctx.peek())) {
break;
}
ctx.advance();
}
if (ctx.getCursor() > start) {
return Optional.of(ParseTree.leaf(ctx.substring(start)));
} else {
return Optional.empty();
}
}
@Override
public String toString() {
return "Token{" +
"characterPredicate=" + characterPredicate +
'}';
}
}

View file

@ -0,0 +1,45 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Arrays;
import java.util.Optional;
public final class UntilString extends Rule {
private final char[] charArr;
public UntilString(final String s) {
this.charArr = s.toCharArray();
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
ctx.advanceToNextToken();
final int start = ctx.getCursor();
int matched = 0;
while (ctx.hasNext() && matched < charArr.length) {
if (ctx.next() == charArr[matched]) {
matched++;
} else {
matched = 0;
}
}
if (!ctx.hasNext()) {
return Optional.of(ParseTree.leaf(ctx.substring(start)));
} else {
return Optional.of(ParseTree.leaf(ctx.substring(start, ctx.getCursor())));
}
}
@Override
public String toString() {
return "UntilString{" +
"name='" + name + '\'' +
", charArr=" + Arrays.toString(charArr) +
'}';
}
}

View file

@ -0,0 +1,32 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
public final class Wrapper extends Rule {
private Rule childRule;
public Rule getChildRule() {
return childRule;
}
public void setChildRule(final Rule childRule) {
this.childRule = childRule;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
if (childRule != null) {
return childRule.apply(ctx);
} else {
return Optional.empty();
}
}
@Override
public String toString() {
return "Wrapper{" + childRule.toString() + "}";
}
}

View file

@ -0,0 +1,39 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
public final class ZeroOrMore extends Rule {
private final Rule childRule;
public ZeroOrMore(final Rule childRule) {
this.childRule = childRule;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
final List<ParseTree> children = new ArrayList<>();
while (ctx.hasNext()) {
final Optional<ParseTree> child = childRule.apply(ctx);
if (child.isEmpty()) {
break;
} else {
children.add(child.get());
}
}
return Optional.of(ParseTree.node(name, children));
}
@Override
public String toString() {
return "ZeroOrMore{" +
childRule +
'}';
}
}

View file

@ -0,0 +1,31 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
public final class ZeroOrOne extends Rule {
private final Rule childRule;
public ZeroOrOne(final Rule childRule) {
this.childRule = childRule;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
final Optional<ParseTree> child = childRule.apply(ctx);
if (child.isPresent()) {
return child;
} else {
return Optional.of(ParseTree.leaf(""));
}
}
@Override
public String toString() {
return "ZeroOrOne{" +
childRule +
'}';
}
}

View file

@ -0,0 +1,148 @@
package com.albertoventurini.parsley.grammar;
import com.albertoventurini.parsley.grammar.rules.Rule;
import org.junit.jupiter.api.Test;
import java.util.function.Function;
import static com.albertoventurini.parsley.grammar.Grammar.character;
import static com.albertoventurini.parsley.grammar.Grammar.oneOf;
import static com.albertoventurini.parsley.grammar.Grammar.sequence;
import static com.albertoventurini.parsley.grammar.Grammar.string;
import static com.albertoventurini.parsley.grammar.Grammar.takeWhile;
import static com.albertoventurini.parsley.grammar.Grammar.token;
import static com.albertoventurini.parsley.grammar.Grammar.zeroOrMore;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class GrammarTest {
@Test
public void xmlTagWithoutAttributes() {
final Rule tag = sequence(
character('<'),
takeWhile(Character::isLetterOrDigit),
oneOf(string("/>"), character('>')));
final Grammar grammar = new Grammar(tag, null);
final var parseTree1 = grammar.parse("<tag1>");
assertTrue(parseTree1.isPresent());
assertEquals("tag1", parseTree1.get().child(0).getText());
final var parseTree2 = grammar.parse("<tag1 />");
assertTrue(parseTree2.isPresent());
assertEquals("tag1", parseTree2.get().child(0).getText());
}
@Test
public void xmlTagWithOptionalAttributes() {
final Rule token = takeWhile(Character::isLetterOrDigit);
final Rule attribute = sequence(token, character('='), token);
final Rule tag = sequence(
character('<'),
token,
zeroOrMore(attribute),
oneOf(string("/>"), character('>')));
final Grammar grammar = new Grammar(tag, null);
assertTrue(grammar.parse("<tag1>").isPresent());
final var parseTree1 = grammar.parse("<tag1 id=123>");
assertTrue(parseTree1.isPresent());
assertEquals("tag1", parseTree1.get().child(0).getText());
assertEquals("id", parseTree1.get().child(1) // get the zeroOrMore rule
.child(0) // get the first attribute
.child(0) // get the first element in the sequence rule
.getText()
);
assertEquals("123", parseTree1.get().child(1) // get the zeroOrMore rule
.child(0) // get the first attribute
.child(1) // get the second child in the sequence
.getText()
);
final var parseTree2 = grammar.parse("<tag1 hello=world something=else/>");
assertTrue(parseTree2.isPresent());
assertEquals("tag1", parseTree2.get().child(0).getText());
assertEquals("hello", parseTree2.get().child(1) // get the zeroOrMore rule
.child(0) // get the first attribute
.child(0) // get the first child in the sequence
.getText()
);
assertEquals("world", parseTree2.get().child(1) // get the zeroOrMore rule
.child(0) // get the first attribute
.child(1) // get the second child in the sequence
.getText()
);
assertEquals("something", parseTree2.get().child(1) // get the zeroOrMore rule
.child(1) // get the second attribute
.child(0) // get the first child in the sequence
.getText()
);
assertEquals("else", parseTree2.get().child(1) // get the zeroOrMore rule
.child(1) // get the second attribute
.child(1) // get the second child in the sequence
.getText()
);
}
@Test
public void xmlTagWithAttributeSurroundedByQuotes() {
final Rule attribute = sequence(
takeWhile(c -> c != '='),
character('='),
character('\''),
takeWhile(c -> c != '\''),
character('\''));
final Rule tag = sequence(
character('<'),
token(),
zeroOrMore(attribute),
oneOf(string("/>"), character('>')));
final Grammar grammar = new Grammar(tag, null);
assertTrue(grammar.parse("<graphml xmlns='http://graphml.graphdrawing.org/xmlns'>").isPresent());
}
@Test
public void elementWithoutContent() {
final Rule attribute = sequence(
takeWhile(c -> c != '=' && c != '>'),
character('='),
character('\''),
takeWhile(c -> c != '\''),
character('\''));
final Function<String, Rule> tagFunc = (s) -> sequence(
character('<'),
string(s),
zeroOrMore(attribute),
character('>'));
final Function<String, Rule> closingTagFunc = (s) -> sequence(
string("</"),
string(s),
string(">"));
final Rule key = sequence(
tagFunc.apply("key"),
closingTagFunc.apply("key"));
final Grammar grammar = new Grammar(key, null);
assertTrue(grammar.parse("<key id='type' for='node' attr.name='type' attr.type='string'></key>").isPresent());
}
}