Support Markdown lists

This commit is contained in:
Alberto Venturini 2021-07-11 14:58:58 +02:00
parent 301eaaf98d
commit 90f84950d2
24 changed files with 307 additions and 88 deletions
parsley-examples/src
main/java/com/albertoventurini/parsley/examples
test/java/com/albertoventurini/parsley/examples
parsley-grammar
pom.xml

View file

@ -46,9 +46,9 @@ public class GrammarGraphMLParser implements GraphMLParser {
}
private Node processNodeElem(final ParseTree nodeElem) {
final String id = extractAttribute(nodeElem.child("tag:node"), "id");
final String id = extractAttribute(nodeElem.getChild("tag:node"), "id");
final Map<String, String> data = nodeElem.child("content").getChildren()
final Map<String, String> data = nodeElem.getChild("content").getChildren()
.stream()
.map(this::extractDataElemContent)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
@ -57,11 +57,11 @@ public class GrammarGraphMLParser implements GraphMLParser {
}
private Edge processEdgeElem(final ParseTree edgeElem) {
final String id = extractAttribute(edgeElem.child("tag:edge"), "id");
final String source = extractAttribute(edgeElem.child("tag:edge"), "source");
final String target = extractAttribute(edgeElem.child("tag:edge"), "target");
final String id = extractAttribute(edgeElem.getChild("tag:edge"), "id");
final String source = extractAttribute(edgeElem.getChild("tag:edge"), "source");
final String target = extractAttribute(edgeElem.getChild("tag:edge"), "target");
final Map<String, String> data = edgeElem.child("content").getChildren()
final Map<String, String> data = edgeElem.getChild("content").getChildren()
.stream()
.map(this::extractDataElemContent)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
@ -70,16 +70,16 @@ public class GrammarGraphMLParser implements GraphMLParser {
}
private Map.Entry<String, String> extractDataElemContent(final ParseTree dataElem) {
final var attribute = extractAttribute(dataElem.child("tag:data"), "key");
final var content = dataElem.child(1).getText();
final var attribute = extractAttribute(dataElem.getChild("tag:data"), "key");
final var content = dataElem.getChild(1).getText();
return Map.entry(attribute, content);
}
private String extractAttribute(final ParseTree tag, final String attributeKey) {
return tag.child("attributes").getChildren()
return tag.getChild("attributes").getChildren()
.stream()
.filter(a -> a.child(0).getText().equals(attributeKey))
.map(a -> a.child(1).getText())
.filter(a -> a.getChild(0).getText().equals(attributeKey))
.map(a -> a.getChild(1).getText())
.findFirst()
.orElseThrow();
}

View file

@ -7,21 +7,68 @@ import static com.albertoventurini.parsley.grammar.rules.Rules.*;
public class MarkdownGrammar extends Grammar {
private final char NEWLINE = '\n';
private final char BOLD_DELIMITER = '*';
private final char ITALIC_DELIMITER = '_';
private final Rule h1 = sequence(
character('#'),
takeWhile(c -> c != '\n')
);
takeWhile(c -> c != NEWLINE).as("text")
).as("h1");
private final Rule h2 = sequence(
string("##"),
takeWhile(c -> c != NEWLINE).as("text")
).as("h2");
private final Rule h3 = sequence(
string("###"),
takeWhile(c -> c != NEWLINE).as("text")
).as("h3");
private final Rule headers = sequence(oneOf(h3, h2, h1), zeroOrMore(character(NEWLINE)));
private final Rule plainText = takeWhile(c ->
c != NEWLINE
&& c != BOLD_DELIMITER
&& c != ITALIC_DELIMITER
).as("text");
private final Rule bold = sequence(
character(BOLD_DELIMITER),
takeWhile(c -> c != NEWLINE && c != BOLD_DELIMITER).as("bold"),
character(BOLD_DELIMITER)
).as("boldWrapper");
private final Rule italic = sequence(
character(ITALIC_DELIMITER),
takeWhile(c -> c != NEWLINE && c != ITALIC_DELIMITER).as("italic"),
character(ITALIC_DELIMITER)
).as("italicWrapper");
private final Rule text = oneOf(
bold,
italic,
plainText);
private final Rule paragraph = sequence(
character('\n'),
takeWhile(c -> c != '\n')
);
oneOrMore(text).as("paragraph"),
zeroOrMore(character(NEWLINE))
).as("paragraphWrapper");
private final Rule element = oneOf(h1, paragraph);
private final Rule listItem = sequence(
oneOf(string("* "), string("- ")),
takeWhile(c -> c != NEWLINE).as("listItem"),
character(NEWLINE)
).as("listItemWrapper");
private final Rule elements = sequence(element);
private final Rule list = sequence(oneOrMore(listItem).as("list"), zeroOrMore(character(NEWLINE)));
private final Rule document = zeroOrMore(elements);
private final Rule element = oneOf(headers, list, paragraph);
private final Rule document = zeroOrMore(element);
@Override
public Rule startRule() {
@ -32,4 +79,9 @@ public class MarkdownGrammar extends Grammar {
protected Rule commentRule() {
return null;
}
@Override
protected boolean whitespace(final char c) {
return c != '\n' && Character.isWhitespace(c);
}
}

View file

@ -1,13 +0,0 @@
package com.albertoventurini.parsley.examples.graphml;
import com.albertoventurini.parsley.examples.markdown.MarkdownGrammar;
import org.junit.jupiter.api.Test;
public class MarkdownGrammarTest {
@Test
public void markdownGrammar_withH1AndParagraph_shouldParse() {
final MarkdownGrammar grammar = new MarkdownGrammar();
}
}

View file

@ -0,0 +1,73 @@
package com.albertoventurini.parsley.examples.markdown;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
public class MarkdownGrammarTest {
private final MarkdownGrammar grammar = new MarkdownGrammar();
@Test
public void shouldParseH1() {
final var parseResult = grammar.parse("# hello world");
assertTrue(parseResult.isPresent());
assertEquals("# hello world", parseResult.get().getText());
assertTrue(parseResult.get().getFirstDescendantByName("h1").isPresent());
assertEquals("# hello world", parseResult.get().getFirstDescendantByName("h1").get().getText());
assertTrue(parseResult.get().getFirstDescendantByName("text").isPresent());
assertEquals("hello world", parseResult.get().getFirstDescendantByName("text").get().getText());
}
@Test
public void shouldParseH2() {
final var parseResult = grammar.parse("## hello world");
assertTrue(parseResult.isPresent());
assertEquals("## hello world", parseResult.get().getText());
assertTrue(parseResult.get().getFirstDescendantByName("h2").isPresent());
assertEquals("## hello world", parseResult.get().getFirstDescendantByName("h2").get().getText());
assertNotNull(parseResult.get().getFirstDescendantByName("text"));
assertTrue(parseResult.get().getFirstDescendantByName("text").isPresent());
assertEquals("hello world", parseResult.get().getFirstDescendantByName("text").get().getText());
}
@Test
public void shouldParseParagraph() {
final var parseResult = grammar.parse("hello world");
assertTrue(parseResult.isPresent());
assertEquals("hello world", parseResult.get().getText());
assertTrue(parseResult.get().getFirstDescendantByName("paragraph").isPresent());
assertEquals("hello world", parseResult.get().getFirstDescendantByName("paragraph").get().getText());
}
@Test
public void shouldParseParagraphWithBoldText() {
final var parseResult = grammar.parse("hello world *bold text* normal text");
assertTrue(parseResult.isPresent());
assertTrue(parseResult.get().getFirstDescendantByName("paragraph").isPresent());
assertEquals(3, parseResult.get().getFirstDescendantByName("paragraph").get().getChildren().size());
assertEquals("bold text", parseResult.get().getFirstDescendantByName("bold").get().getText());
}
@Test
public void shouldParseList() {
final var parseResult = grammar.parse("hello world\n" +
"\n" +
"* first item\n" +
"* second item\n" +
"");
assertTrue(parseResult.isPresent());
assertTrue(parseResult.get().getFirstDescendantByName("paragraph").isPresent());
assertEquals("hello world", parseResult.get().getFirstDescendantByName("paragraph").get().getText());
assertTrue(parseResult.get().getFirstDescendantByName("list").isPresent());
assertEquals(2, parseResult.get().getFirstDescendantByName("list").get().getChildren().size());
assertEquals("first item", parseResult.get().getFirstDescendantByName("list").get().getChildren().get(0).getFirstDescendantByName("listItem").get().getText());
}
}

View file

@ -27,6 +27,11 @@
</properties>
<dependencies>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>

View file

@ -10,8 +10,12 @@ public abstract class Grammar {
protected abstract Rule commentRule();
protected boolean whitespace(final char c) {
return Character.isWhitespace(c);
}
public Optional<ParseTree> parse(final String text) {
final var ctx = new GrammarContext(text, commentRule());
final var ctx = new GrammarContext(text, commentRule(), this::whitespace);
return startRule().apply(ctx);
}

View file

@ -2,15 +2,20 @@ package com.albertoventurini.parsley.grammar;
import com.albertoventurini.parsley.grammar.rules.Rule;
import java.util.function.Predicate;
public class GrammarContext extends ParseContext {
private final Rule commentRule;
private final Predicate<Character> whitespacePredicate;
private boolean inComment;
GrammarContext(
final String string,
final Rule commentRule) {
final Rule commentRule,
final Predicate<Character> whitespacePredicate) {
super(string);
this.commentRule = commentRule;
this.whitespacePredicate = whitespacePredicate;
}
public void advanceToNextToken() {
@ -21,6 +26,19 @@ public class GrammarContext extends ParseContext {
}
}
public void advanceToEndOfToken() {
while (hasNext()) {
if (commentRule.apply(this).isPresent()) {
break;
}
final char c = peek();
if (isWhitespace(c)) {
break;
}
}
}
public boolean isInComment() {
return inComment;
}
@ -38,10 +56,16 @@ public class GrammarContext extends ParseContext {
private void discardWhitespaces() {
while (hasNext()) {
final char c = peek();
if (!(Character.isWhitespace(c) || c == '\n')) {
if (!isWhitespace(c)) {
break;
}
advance();
}
}
private boolean isWhitespace(final char c) {
return whitespacePredicate.test(c);
// return Character.isWhitespace(c) || c == '\n';
// return c != '\n' && Character.isWhitespace(c);
}
}

View file

@ -1,38 +1,55 @@
package com.albertoventurini.parsley.grammar;
import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
public abstract class ParseTree {
private final String name;
private final String text;
private final List<ParseTree> children;
public static final class Leaf extends ParseTree {
public Leaf(final String text) {
super(text, Collections.emptyList());
public Leaf(
@Nonnull final String name,
@Nonnull final String text) {
super(name, text, Collections.emptyList());
}
}
public static final class Node extends ParseTree {
public Node(final String text, final List<ParseTree> children) {
super(text, children);
public Node(
@Nonnull final String name,
@Nonnull final String text,
@Nonnull final List<ParseTree> children) {
super(name, text, children);
}
}
public ParseTree(final String text, final List<ParseTree> children) {
public ParseTree(
@Nonnull final String name,
@Nonnull final String text,
@Nonnull final List<ParseTree> children) {
this.name = name;
this.text = text;
this.children = children;
}
public static Leaf leaf(final String text) {
return new Leaf(text);
public static Leaf leaf(
@Nonnull final String name,
@Nonnull final String text) {
return new Leaf(name, text);
}
public static Node node(final String text, final List<ParseTree> children) {
return new Node(text, children);
public static Node node(
@Nonnull final String name,
@Nonnull final String text,
@Nonnull final List<ParseTree> children) {
return new Node(name, text, children);
}
public String getText() {
@ -43,12 +60,12 @@ public abstract class ParseTree {
return children;
}
public ParseTree child(final int i) {
public ParseTree getChild(final int i) {
return children.get(i);
}
public Optional<ParseTree> getFirstDescendantByName(final String name) {
if (name.equals(text)) {
public Optional<ParseTree> getFirstDescendantByName(@Nonnull final String name) {
if (name.equals(this.name)) {
return Optional.of(this);
}
@ -62,7 +79,7 @@ public abstract class ParseTree {
return Optional.empty();
}
public ParseTree child(final String name) {
public ParseTree getChild(@Nonnull final String name) {
return children.stream().filter(c -> name.equals(c.text))
.findFirst()
.orElseThrow(() -> new RuntimeException("Child not found: " + name));
@ -70,6 +87,9 @@ public abstract class ParseTree {
@Override
public String toString() {
return text;
return "ParseTree{" +
"name='" + name + '\'' +
", text='" + text + '\'' +
'}';
}
}

View file

@ -6,9 +6,14 @@ import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
public final class AnyCharacter extends Rule {
public AnyCharacter() {
this.name = "AnyCharacter";
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
return Optional.of(ParseTree.leaf(Character.toString(ctx.next())));
return Optional.of(ParseTree.leaf(name, Character.toString(ctx.next())));
}
@Override

View file

@ -0,0 +1,27 @@
package com.albertoventurini.parsley.grammar.rules;
import com.albertoventurini.parsley.grammar.GrammarContext;
import com.albertoventurini.parsley.grammar.ParseTree;
import java.util.Optional;
public class AnyToken extends Rule {
public AnyToken() {
this.name = "AnyToken";
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
ctx.advanceToNextToken();
if (!ctx.hasNext()) {
return Optional.empty();
}
final int start = ctx.getCursor();
ctx.advanceToEndOfToken();
return Optional.of(ParseTree.leaf(name, ctx.substring(start)));
}
}

View file

@ -9,6 +9,7 @@ public final class MatchCharacter extends Rule {
private final char c;
public MatchCharacter(final char c) {
this.name = "MatchCharacter";
this.c = c;
discard = true;
}
@ -17,8 +18,8 @@ public final class MatchCharacter extends Rule {
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
ctx.advanceToNextToken();
if (ctx.peek() == c) {
return Optional.of(ParseTree.leaf(Character.toString(ctx.next())));
if (ctx.hasNext() && ctx.peek() == c) {
return Optional.of(ParseTree.leaf(name, Character.toString(ctx.next())));
} else {
return Optional.empty();
}

View file

@ -9,6 +9,7 @@ public final class MatchString extends Rule {
private final String s;
public MatchString(final String s) {
this.name = "MatchString";
this.s = s;
discard = true;
@ -18,9 +19,9 @@ public final class MatchString extends Rule {
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
ctx.advanceToNextToken();
if (ctx.matches(s)) {
if (ctx.hasNext() && ctx.matches(s)) {
ctx.advance(s.length());
return Optional.of(ParseTree.leaf(s));
return Optional.of(ParseTree.leaf(name, s));
} else {
return Optional.empty();
}

View file

@ -10,6 +10,7 @@ public final class OneOf extends Rule {
private final Rule[] rules;
public OneOf(final Rule... rules) {
this.name = "OneOf";
this.rules = rules;
}

View file

@ -11,11 +11,13 @@ public class OneOrMore extends Rule {
private final Rule childRule;
public OneOrMore(final Rule childRule) {
this.name = "OneOrMore";
this.childRule = childRule;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
final int start = ctx.getCursor();
final List<ParseTree> children = new ArrayList<>();
final Optional<ParseTree> firstChild = childRule.apply(ctx);
@ -34,7 +36,7 @@ public class OneOrMore extends Rule {
}
}
return Optional.of(ParseTree.node(name, children));
return Optional.of(ParseTree.node(name, ctx.substring(start), children));
}
@Override

View file

@ -8,6 +8,8 @@ import java.util.Optional;
public abstract class Rule {
protected String name;
protected String text;
public boolean isComment = false;
protected boolean discard = false;

View file

@ -12,6 +12,10 @@ public class Rules {
return new AnyCharacter();
}
public static AnyToken anyToken() {
return new AnyToken();
}
public static MatchString string(final String s) {
return new MatchString(s);
}

View file

@ -12,6 +12,7 @@ public final class Sequence extends Rule {
private final Rule[] rules;
public Sequence(final Rule... rules) {
this.name = "Sequence";
this.rules = rules;
}
@ -21,11 +22,6 @@ public final class Sequence extends Rule {
final List<ParseTree> children = new ArrayList<>(rules.length);
for (final Rule rule : rules) {
if (!ctx.hasNext()) {
ctx.setCursor(start);
return Optional.empty();
}
final Optional<ParseTree> child = rule.apply(ctx);
if (child.isEmpty()) {
@ -36,7 +32,7 @@ public final class Sequence extends Rule {
}
}
return Optional.of(ParseTree.node(name, children));
return Optional.of(ParseTree.node(name, ctx.substring(start), children));
}
@Override

View file

@ -16,6 +16,7 @@ public final class TakeWhileCharacter extends Rule {
}
public TakeWhileCharacter(final Predicate<Character> characterPredicate) {
this.name = "TakeWhileCharacter";
this.characterPredicate = characterPredicate;
}
@ -33,7 +34,7 @@ public final class TakeWhileCharacter extends Rule {
}
if (ctx.getCursor() > start) {
return Optional.of(ParseTree.leaf(ctx.substring(start)));
return Optional.of(ParseTree.leaf(name, ctx.substring(start)));
} else {
return Optional.empty();
}

View file

@ -10,6 +10,7 @@ public final class UntilString extends Rule {
private final char[] charArr;
public UntilString(final String s) {
this.name = "UntilString";
this.charArr = s.toCharArray();
}
@ -29,9 +30,9 @@ public final class UntilString extends Rule {
}
if (!ctx.hasNext()) {
return Optional.of(ParseTree.leaf(ctx.substring(start)));
return Optional.of(ParseTree.leaf(name, ctx.substring(start)));
} else {
return Optional.of(ParseTree.leaf(ctx.substring(start, ctx.getCursor())));
return Optional.of(ParseTree.leaf(name, ctx.substring(start, ctx.getCursor())));
}
}

View file

@ -8,6 +8,10 @@ import java.util.Optional;
public final class Wrapper extends Rule {
private Rule childRule;
public Wrapper() {
this.name = "Wrapper";
}
public Rule getChildRule() {
return childRule;
}

View file

@ -11,11 +11,13 @@ public final class ZeroOrMore extends Rule {
private final Rule childRule;
public ZeroOrMore(final Rule childRule) {
this.name = "ZeroOrMore";
this.childRule = childRule;
}
@Override
public Optional<ParseTree> tryApply(final GrammarContext ctx) {
final int start = ctx.getCursor();
final List<ParseTree> children = new ArrayList<>();
while (ctx.hasNext()) {
@ -27,7 +29,7 @@ public final class ZeroOrMore extends Rule {
}
}
return Optional.of(ParseTree.node(name, children));
return Optional.of(ParseTree.node(name, ctx.substring(start), children));
}
@Override

View file

@ -9,6 +9,7 @@ public final class ZeroOrOne extends Rule {
private final Rule childRule;
public ZeroOrOne(final Rule childRule) {
this.name = "ZeroOrOne";
this.childRule = childRule;
}
@ -18,7 +19,7 @@ public final class ZeroOrOne extends Rule {
if (child.isPresent()) {
return child;
} else {
return Optional.of(ParseTree.leaf(""));
return Optional.of(ParseTree.leaf(name,""));
}
}

View file

@ -23,12 +23,12 @@ public class GrammarTest {
final var parseTree1 = grammar.parse("<tag1>");
assertTrue(parseTree1.isPresent());
assertEquals("tag1", parseTree1.get().child(0).getText());
assertEquals("tag1", parseTree1.get().getChild(0).getText());
final var parseTree2 = grammar.parse("<tag1 />");
assertTrue(parseTree2.isPresent());
assertEquals("tag1", parseTree2.get().child(0).getText());
assertEquals("tag1", parseTree2.get().getChild(0).getText());
}
@Test
@ -50,42 +50,42 @@ public class GrammarTest {
final var parseTree1 = grammar.parse("<tag1 id=123>");
assertTrue(parseTree1.isPresent());
assertEquals("tag1", parseTree1.get().child(0).getText());
assertEquals("id", parseTree1.get().child(1) // get the zeroOrMore rule
.child(0) // get the first attribute
.child(0) // get the first element in the sequence rule
assertEquals("tag1", parseTree1.get().getChild(0).getText());
assertEquals("id", parseTree1.get().getChild(1) // get the zeroOrMore rule
.getChild(0) // get the first attribute
.getChild(0) // get the first element in the sequence rule
.getText()
);
assertEquals("123", parseTree1.get().child(1) // get the zeroOrMore rule
.child(0) // get the first attribute
.child(1) // get the second child in the sequence
assertEquals("123", parseTree1.get().getChild(1) // get the zeroOrMore rule
.getChild(0) // get the first attribute
.getChild(1) // get the second child in the sequence
.getText()
);
final var parseTree2 = grammar.parse("<tag1 hello=world something=else/>");
assertTrue(parseTree2.isPresent());
assertEquals("tag1", parseTree2.get().child(0).getText());
assertEquals("tag1", parseTree2.get().getChild(0).getText());
assertEquals("hello", parseTree2.get().child(1) // get the zeroOrMore rule
.child(0) // get the first attribute
.child(0) // get the first child in the sequence
assertEquals("hello", parseTree2.get().getChild(1) // get the zeroOrMore rule
.getChild(0) // get the first attribute
.getChild(0) // get the first child in the sequence
.getText()
);
assertEquals("world", parseTree2.get().child(1) // get the zeroOrMore rule
.child(0) // get the first attribute
.child(1) // get the second child in the sequence
assertEquals("world", parseTree2.get().getChild(1) // get the zeroOrMore rule
.getChild(0) // get the first attribute
.getChild(1) // get the second child in the sequence
.getText()
);
assertEquals("something", parseTree2.get().child(1) // get the zeroOrMore rule
.child(1) // get the second attribute
.child(0) // get the first child in the sequence
assertEquals("something", parseTree2.get().getChild(1) // get the zeroOrMore rule
.getChild(1) // get the second attribute
.getChild(0) // get the first child in the sequence
.getText()
);
assertEquals("else", parseTree2.get().child(1) // get the zeroOrMore rule
.child(1) // get the second attribute
.child(1) // get the second child in the sequence
assertEquals("else", parseTree2.get().getChild(1) // get the zeroOrMore rule
.getChild(1) // get the second attribute
.getChild(1) // get the second child in the sequence
.getText()
);
}

View file

@ -23,12 +23,18 @@
<maven-surefire-plugin.version>3.0.0-M5</maven-surefire-plugin.version>
<maven-jar-plugin.version>3.2.0</maven-jar-plugin.version>
<maven-shade.plugin.version>3.2.2</maven-shade.plugin.version>
<jsr305.version>3.0.2</jsr305.version>
<mainClass>com.albertoventurini.parsley.Main</mainClass>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
<version>3.0.2</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>