Skip to content

Commit

Permalink
add Bar-Hillel parser
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Oct 13, 2023
1 parent f798ffd commit bd5ad7e
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 146 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ infix fun CFG.intersectLevFSA(fsa: FSA): CFG {
second.coords().dominates(third.coords())
}

// For each production A → BC in P , for every p, q, r ∈ Q,
// For each production A → BC in P, for every p, q, r ∈ Q,
// we have the production [p,A,r] → [p,B,q] [q,C,r] in P′.
val binaryProds =
nonterminalProductions.map {
Expand Down Expand Up @@ -56,4 +56,42 @@ infix fun CFG.intersectLevFSA(fsa: FSA): CFG {
.also { println("∩-grammar construction took: ${clock.elapsedNow().inWholeMilliseconds}ms") }
// .also { println(it.pretty) }
// .also { println(it.size) }
}


infix fun FSA.intersect(cfg: CFG) = cfg.intersect(this)

infix fun CFG.intersect(fsa: FSA): CFG {
val clock = TimeSource.Monotonic.markNow()
val initFinal =
(fsa.init * fsa.final).map { (q, r) -> "START -> [$q,START,$r]" }

val transits =
fsa.Q.map { (q, a, r) -> "[$q,$a,$r] -> $a" }

// For each production A → BC in P, for every p, q, r ∈ Q,
// we have the production [p,A,r] → [p,B,q] [q,C,r] in P′.
val binaryProds =
nonterminalProductions.map {
val triples = fsa.states * fsa.states * fsa.states
val (A, B, C) = it.π1 to it.π2[0] to it.π2[1]
triples.map { (p, q, r) -> "[$p,$A,$r] -> [$p,$B,$q] [$q,$C,$r]" }
}.flatten()

// For every production A → σ in P, for every (p, σ, q) ∈ Q × Σ × Q
// such that δ(p, σ) = q we have the production [p, A, q] → σ in P′.
val unitProds =
unitProductions.map { (A, rhs) ->
val relevantTransits = fsa.Q.filter { it.π2 == rhs[0] }
relevantTransits.map { (p, σ, q) -> "[$p,$A,$q] -> $σ" }
}.flatten()

return (initFinal + transits + binaryProds + unitProds).joinToString("\n")
.parseCFG(normalize = false)
.also { println("∩-grammar has ${it.size} total productions") }
.removeVestigalProductions().normalForm.noNonterminalStubs
.also { println("∩-grammar has ${it.size} useful productions") }
.also { println("∩-grammar construction took: ${clock.elapsedNow().inWholeMilliseconds}ms") }
// .also { println(it.pretty) }
// .also { println(it.size) }
}
32 changes: 31 additions & 1 deletion src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/FSA.kt
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,34 @@ data class FSA(val Q: TSA, val init: Set<Σᐩ>, val final: Set<Σᐩ>) {
} intersect final).isNotEmpty()
}

val TSA.states by cache { flatMap { listOf(it.π1, it.π3) }.toSet() }
val TSA.states by cache { flatMap { listOf(it.π1, it.π3) }.toSet() }

// FSAs looks like this:
/*
INIT -> 1 | 3
DONE -> 4
1 -<a>-> 1
1 -<+>-> 3
3 -<b>-> 4
4 -<+>-> 1
4 -<b>-> 4
*/

fun Σᐩ.parseFSA(): FSA {
val Q =
lines().asSequence()
.filter { it.isNotBlank() }
.map { it.split("->") }
.map { (lhs, rhs) ->
val src = lhs.tokenizeByWhitespace().first()
val dst = rhs.split("|").map { it.trim() }.toSet()
val sym = if ("-<" in lhs && lhs.endsWith(">"))
lhs.split("-<").last().dropLast(1) else ""

setOf(src) * setOf(sym) * dst
}.flatten().toList()
.onEach { println(it) }
val init = Q.filter { it.π1 == "INIT" }.map { it.π3 }.toSet()
val final = Q.filter { it.π1 == "DONE" }.map { it.π3 }.toSet()
return FSA(Q.filter { it.π1 !in setOf("INIT", "DONE") }.toSet(), init, final)
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class PTree(val root: String = "ε", val branches: List<Π2A<PTree>> = listOf())
// Samples instantaneously from the parse forest, but may return duplicates
// and only returns a fraction of the number of distinct strings when compared
// to SWOR on medium-sized finite sets under the same wall-clock timeout. If
// the set is sufficiently large, distinctness will never a problem.
// the set is sufficiently large, distinctness will never be a problem.
fun sampleWithReplacement(): Sequence<String> = sequence { while(true) yield(sample()) }

fun sample(): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,139 +13,6 @@ class BarHillelTest {
*/
@Test
fun testManualBarHillel() {
// Generated from https://github.com/breandan/bar-hillel/blob/7527d2ad1a007fb4667b85cec295a43c56a237db/rayuela/test/cfg/test_epsilon_Bar_Hillel.py
val bhcfg = """
START -> [1,START,4]
START -> [3,START,4]
[1,+,3] -> +
[1,+,3] -> *
[1,L,1] -> [1,O,1] [1,N,1]
[1,L,1] -> [1,O,3] [3,N,1]
[1,L,1] -> [1,O,4] [4,N,1]
[1,L,3] -> [1,O,1] [1,N,3]
[1,L,3] -> [1,O,3] [3,N,3]
[1,L,3] -> [1,O,4] [4,N,3]
[1,L,4] -> [1,O,1] [1,N,4]
[1,L,4] -> [1,O,3] [3,N,4]
[1,L,4] -> [1,O,4] [4,N,4]
[1,N,1] -> [1,a,1]
[1,N,1] -> [1,b,1]
[1,N,1] -> [1,N,1] [1,N,1]
[1,N,1] -> [1,N,3] [3,N,1]
[1,N,1] -> [1,N,4] [4,N,1]
[1,N,3] -> [1,a,3]
[1,N,3] -> [1,b,3]
[1,N,3] -> [1,N,1] [1,N,3]
[1,N,3] -> [1,N,3] [3,N,3]
[1,N,3] -> [1,N,4] [4,N,3]
[1,N,4] -> [1,a,4]
[1,N,4] -> [1,b,4]
[1,N,4] -> [1,N,1] [1,N,4]
[1,N,4] -> [1,N,3] [3,N,4]
[1,N,4] -> [1,N,4] [4,N,4]
[1,O,1] -> [1,x,1]
[1,O,1] -> [1,+,1]
[1,O,3] -> [1,x,3]
[1,O,3] -> [1,+,3]
[1,O,4] -> [1,x,4]
[1,O,4] -> [1,+,4]
[1,START,1] -> [1,N,1] [1,L,1]
[1,START,1] -> [1,N,3] [3,L,1]
[1,START,1] -> [1,N,4] [4,L,1]
[1,START,3] -> [1,N,1] [1,L,3]
[1,START,3] -> [1,N,3] [3,L,3]
[1,START,3] -> [1,N,4] [4,L,3]
[1,START,4] -> [1,N,1] [1,L,4]
[1,START,4] -> [1,N,3] [3,L,4]
[1,START,4] -> [1,N,4] [4,L,4]
[1,a,1] -> a
[3,L,1] -> [3,O,1] [1,N,1]
[3,L,1] -> [3,O,3] [3,N,1]
[3,L,1] -> [3,O,4] [4,N,1]
[3,L,3] -> [3,O,1] [1,N,3]
[3,L,3] -> [3,O,3] [3,N,3]
[3,L,3] -> [3,O,4] [4,N,3]
[3,L,4] -> [3,O,1] [1,N,4]
[3,L,4] -> [3,O,3] [3,N,4]
[3,L,4] -> [3,O,4] [4,N,4]
[3,N,1] -> [3,a,1]
[3,N,1] -> [3,b,1]
[3,N,1] -> [3,N,1] [1,N,1]
[3,N,1] -> [3,N,3] [3,N,1]
[3,N,1] -> [3,N,4] [4,N,1]
[3,N,3] -> [3,a,3]
[3,N,3] -> [3,b,3]
[3,N,3] -> [3,N,1] [1,N,3]
[3,N,3] -> [3,N,3] [3,N,3]
[3,N,3] -> [3,N,4] [4,N,3]
[3,N,4] -> [3,a,4]
[3,N,4] -> [3,b,4]
[3,N,4] -> [3,N,1] [1,N,4]
[3,N,4] -> [3,N,3] [3,N,4]
[3,N,4] -> [3,N,4] [4,N,4]
[3,O,1] -> [3,x,1]
[3,O,1] -> [3,+,1]
[3,O,3] -> [3,x,3]
[3,O,3] -> [3,+,3]
[3,O,4] -> [3,x,4]
[3,O,4] -> [3,+,4]
[3,START,1] -> [3,N,1] [1,L,1]
[3,START,1] -> [3,N,3] [3,L,1]
[3,START,1] -> [3,N,4] [4,L,1]
[3,START,3] -> [3,N,1] [1,L,3]
[3,START,3] -> [3,N,3] [3,L,3]
[3,START,3] -> [3,N,4] [4,L,3]
[3,START,4] -> [3,N,1] [1,L,4]
[3,START,4] -> [3,N,3] [3,L,4]
[3,START,4] -> [3,N,4] [4,L,4]
[3,b,4] -> b
[4,+,1] -> +
[4,+,1] -> *
[4,L,1] -> [4,O,1] [1,N,1]
[4,L,1] -> [4,O,3] [3,N,1]
[4,L,1] -> [4,O,4] [4,N,1]
[4,L,3] -> [4,O,1] [1,N,3]
[4,L,3] -> [4,O,3] [3,N,3]
[4,L,3] -> [4,O,4] [4,N,3]
[4,L,4] -> [4,O,1] [1,N,4]
[4,L,4] -> [4,O,3] [3,N,4]
[4,L,4] -> [4,O,4] [4,N,4]
[4,N,1] -> [4,a,1]
[4,N,1] -> [4,b,1]
[4,N,1] -> [4,N,1] [1,N,1]
[4,N,1] -> [4,N,3] [3,N,1]
[4,N,1] -> [4,N,4] [4,N,1]
[4,N,3] -> [4,a,3]
[4,N,3] -> [4,b,3]
[4,N,3] -> [4,N,1] [1,N,3]
[4,N,3] -> [4,N,3] [3,N,3]
[4,N,3] -> [4,N,4] [4,N,3]
[4,N,4] -> [4,a,4]
[4,N,4] -> [4,b,4]
[4,N,4] -> [4,N,1] [1,N,4]
[4,N,4] -> [4,N,3] [3,N,4]
[4,N,4] -> [4,N,4] [4,N,4]
[4,O,1] -> [4,x,1]
[4,O,1] -> [4,+,1]
[4,O,3] -> [4,x,3]
[4,O,3] -> [4,+,3]
[4,O,4] -> [4,x,4]
[4,O,4] -> [4,+,4]
[4,START,1] -> [4,N,1] [1,L,1]
[4,START,1] -> [4,N,3] [3,L,1]
[4,START,1] -> [4,N,4] [4,L,1]
[4,START,3] -> [4,N,1] [1,L,3]
[4,START,3] -> [4,N,3] [3,L,3]
[4,START,3] -> [4,N,4] [4,L,3]
[4,START,4] -> [4,N,1] [1,L,4]
[4,START,4] -> [4,N,3] [3,L,4]
[4,START,4] -> [4,N,4] [4,L,4]
[4,b,4] -> b
""".trimIndent().parseCFG().noNonterminalStubs

println(bhcfg.pretty)

// bhcfg is the intersection of
val cfg = """
START -> N L
N -> N N | a | b
Expand All @@ -155,13 +22,18 @@ class BarHillelTest {
""".parseCFG().noNonterminalStubs

val fsa = """
INIT: 1, 3 FINAL: 4
1 -[a]-> 1
1 -[+]-> 3
3 -[b]-> 4
4 -[+]-> 1
4 -[b]-> 4
""".trimIndent()
INIT -> 1 | 3
DONE -> 4
1 -<a>-> 1
1 -<+>-> 3
1 -<*>-> 3
3 -<b>-> 4
4 -<+>-> 1
4 -<*>-> 1
4 -<b>-> 4
""".parseFSA()

val bhcfg = cfg.intersect(fsa)

val fsaCfg = """
START -> START b | 3 b
Expand Down Expand Up @@ -191,7 +63,7 @@ class BarHillelTest {
assertTrue { it in bhcfg.language }
assertTrue { it in fsaCfg.language }
assertTrue { it in cfg.language }
}.take(300).toList().also { println("Found ${it.size} solutions.") }
}.take(300).toList().also { println("Sampling solver found ${it.size} solutions.") }
}.also { println("Sampling solver took: ${it.inWholeMilliseconds}ms") }

clock = TimeSource.Monotonic.markNow()
Expand All @@ -201,7 +73,7 @@ class BarHillelTest {
assertTrue { it in bhcfg.language }
assertTrue { it in fsaCfg.language }
assertTrue { it in cfg.language }
}.toList().also { println("Found ${it.size} solutions.") }
}.toList().also { println("Sequential solver found ${it.size} solutions.") }
}.also { println("Sequential solver took: ${it.inWholeMilliseconds}ms") }

clock = TimeSource.Monotonic.markNow()
Expand All @@ -211,7 +83,7 @@ class BarHillelTest {
assertTrue { it in bhcfg.language }
assertTrue { it in fsaCfg.language }
assertTrue { it in cfg.language }
}.toList().also { println("Found ${it.size} solutions.") }
}.toList().also { println("Sort solver found ${it.size} solutions.") }
}.also { println("Sort solver took: ${it.inWholeMilliseconds}ms") }
}

Expand Down

0 comments on commit bd5ad7e

Please sign in to comment.