From 081c73fc2f094ea79bd4a5160a9a3b81e31f3cb2 Mon Sep 17 00:00:00 2001 From: breandan Date: Mon, 30 Oct 2023 04:31:27 -0400 Subject: [PATCH 1/3] render parse trees from LevBarHillel grammar --- .../kaliningraph/parsing/BarHillel.kt | 44 ++++++++++--------- .../ai/hypergraph/kaliningraph/parsing/CFG.kt | 2 +- .../kaliningraph/parsing/SeqValiant.kt | 2 +- .../hypergraph/kaliningraph/parsing/Tree.kt | 7 ++- .../ai/hypergraph/kaliningraph/types/Graph.kt | 21 +++++---- .../kaliningraph/parsing/BarHillelTest.kt | 25 +++++++++-- 6 files changed, 65 insertions(+), 36 deletions(-) diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt index 5794d088..c04ab387 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt @@ -1,15 +1,17 @@ package ai.hypergraph.kaliningraph.parsing import ai.hypergraph.kaliningraph.types.* -import kotlin.math.absoluteValue import kotlin.time.TimeSource -infix fun FSA.intersectLevFSA(cfg: CFG) = cfg.freeze().intersectLevFSAP(this) +infix fun FSA.intersectLevFSA(cfg: CFG) = cfg.intersectLevFSA(this) // http://www.cs.umd.edu/~gasarch/BLOGPAPERS/cfg.pdf#page=2 // https://browse.arxiv.org/pdf/2209.06809.pdf#page=5 infix fun CFG.intersectLevFSA(fsa: FSA): CFG = freeze().intersectLevFSAP(fsa) +fun CFG.makeLevGrammar(source: List<Σᐩ>, distance: Int) = + intersectLevFSA(makeLevFSA(source, distance, terminals)) + private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG { var clock = TimeSource.Monotonic.markNow() val initFinal = @@ -18,9 +20,9 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG { val transits = fsa.Q.map { (q, a, r) -> "[$q,$a,$r]" to listOf(a) } - fun Triple<Σᐩ, Σᐩ, Σᐩ>.isValid(nts: Triple<Σᐩ, Σᐩ, Σᐩ>): Boolean { + fun Triple<Σᐩ, Σᐩ, Σᐩ>.isCompatibleWith(nts: Triple<Σᐩ, Σᐩ, Σᐩ>): Boolean { fun Σᐩ.coords(): Pair = drop(2).run { - (length / 2).let {substring(0,it).toInt() to substring(it + 1).toInt() } + (length / 2).let { substring(0, it).toInt() to substring(it + 1).toInt() } } fun Pair.dominates(other: Pair) = @@ -37,15 +39,15 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG { fun IntRange.overlaps(other: IntRange) = (other.first in first..last) || (other.last in first..last) - fun parikhBounds(nt: Σᐩ): IntRange = parikhBounds[nt] ?: -1..-1 + fun lengthBounds(nt: Σᐩ): IntRange = lengthBounds[nt] ?: -1..-1 // "[$p,$A,$r] -> [$p,$B,$q] [$q,$C,$r]" fun isCompatible() = first.coords().dominates(second.coords()) && second.coords().dominates(third.coords()) - && parikhBounds(nts.first).overlaps(SPLP(first, third)) - && parikhBounds(nts.second).overlaps(SPLP(first, second)) - && parikhBounds(nts.third).overlaps(SPLP(second, third)) + && lengthBounds(nts.first).overlaps(SPLP(first, third)) + && lengthBounds(nts.second).overlaps(SPLP(first, second)) + && lengthBounds(nts.third).overlaps(SPLP(second, third)) return isCompatible() } @@ -63,7 +65,7 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG { triples // CFG ∩ FSA - in general we are not allowed to do this, but it works // because we assume a Levenshtein FSA, which is monotone and acyclic. - .filter { it.isValid(A to B to C) } + .filter { it.isCompatibleWith(A to B to C) } .map { (p, q, r) -> "[$p,$A,$r]" to listOf("[$p,$B,$q]", "[$q,$C,$r]") } }.flatten() @@ -112,7 +114,7 @@ fun CFG.dropVestigialProductions( // val reachable = reachableSymbols() val rw = toMutableSet() .apply { removeAll { prod -> prod.RHS.any { criteria(it) && it !in nts } } } - .freeze().removeUselessSymbols()//.removeUnreachable().freeze().removeNonGenerating() + .freeze().removeUselessSymbols() println("Removed ${size - rw.size} vestigial productions.") @@ -149,21 +151,21 @@ infix fun CFG.intersect(fsa: FSA): CFG { } // Tracks the number of tokens a given nonterminal can represent -// e.g., a NT with a Parikh bound of 1..3 can parse { s: Σ* | |s| ∈ [1, 3] } -val CFG.parikhBounds: Map<Σᐩ, IntRange> by cache { +// e.g., a NT with a bound of 1..3 can parse { s: Σ^[1, 3] } +val CFG.lengthBounds: Map<Σᐩ, IntRange> by cache { + val clock = TimeSource.Monotonic.markNow() val epsFree = noEpsilonOrNonterminalStubs.freeze() - val temp = List(20) { "_" } + val tpl = List(20) { "_" } val map = epsFree.nonterminals.associateWith { -1..-1 }.toMutableMap() - epsFree.initPForestMat(temp).seekFixpoint().diagonals - .mapIndexed { index, sets -> - val nonterminalsAtLevel = sets.flatMap { it.map { it.key } } - nonterminalsAtLevel.forEach { nt -> - map[nt]?.let { - (if (it.first < 0) (index + 1) else it.first)..(index + 1) - }?.let { map[nt] = it } - } + epsFree.initPForestMat(tpl).seekFixpoint().diagonals.mapIndexed { idx, sets -> + sets.flatMap { it.map { it.key } }.forEach { nt -> + map[nt]?.let { + (if (it.first < 0) (idx + 1) else it.first)..(idx + 1) + }?.let { map[nt] = it } } + } + println("Computed NT length bounds in ${clock.elapsedNow()}") map } \ No newline at end of file diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt index 673169ee..f120a501 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt @@ -24,7 +24,7 @@ val Production.RHS: List<Σᐩ> get() = * will be slow to compute the first time, but much faster on subsequent calls. * Storing the hashCode() in a field avoids recomputing it on every read. */ -fun CFG.freeze(): CFG = FrozenCFG(this) +fun CFG.freeze(): CFG = if (this is FrozenCFG) this else FrozenCFG(this) private class FrozenCFG(val cfg: CFG): CFG by cfg { val cfgId = cfg.hashCode() override fun equals(other: Any?) = diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt index 0cf574de..137e4d5d 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt @@ -81,7 +81,7 @@ class PTree(val root: String = "ε.", val branches: List<Π2A> = listOf() val (lb, rb) = shuffledBranches[remainder.toString().toInt()] val (l, quotient2) = lb.decodeTree(quotient1) val (r, quotient3) = rb.decodeTree(quotient2) - val concat = Tree(l.root, children = arrayOf(l, r)) + val concat = Tree(root, children = arrayOf(l, r)) return concat to quotient3 } diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Tree.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Tree.kt index b871da35..6681ee31 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Tree.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Tree.kt @@ -32,10 +32,13 @@ class Tree constructor( } ) { i, acc, it -> acc + it.toGraph("$j.$i") } + val indxInfo by lazy { if (span.first < Int.MAX_VALUE) " [${span.first}]" else "" } + val spanInfo by lazy { if (span.first < Int.MAX_VALUE) " [$span]" else "" } + fun prettyPrint(buffer: Σᐩ = "", prefix: Σᐩ = "", nextPrefix: Σᐩ = ""): Σᐩ = - if (children.isEmpty()) (buffer + prefix + "${terminal?.htmlify()} [${span.first}]\n") + if (children.isEmpty()) (buffer + prefix + "${terminal?.htmlify()}$indxInfo\n") else children.foldIndexed("$buffer$prefix" + root.htmlify() + - (if (-1 !in span) " [$span]" else "") + "\n") { i: Int, acc: Σᐩ, it: Tree -> + (if (-1 !in span) spanInfo else "") + "\n") { i: Int, acc: Σᐩ, it: Tree -> if (i == children.size - 1) it.prettyPrint(acc + "", "$nextPrefix└── ", "$nextPrefix ") else it.prettyPrint(acc, "$nextPrefix├── ", "$nextPrefix│ ") diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt index 5c8243e5..5ea217a7 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt @@ -188,18 +188,23 @@ val , E: IEdge, V: IVertex> IGraph // All pairs shortest path val , E: IEdge, V: IVertex> IGraph.APSP: Map, Int> by cache { val dist = mutableMapOf, Int>() - for (v in vertices) for (w in vertices) dist[v to w] = if (v == w) 0 else Int.MAX_VALUE - for (e in edges) dist[e.source to e.target] = 1 - for (k in vertices) for (i in vertices) for (j in vertices) { - val ik = dist[i to k]!! - val kj = dist[k to j]!! - val ij = dist[i to j]!! - if (ik != Int.MAX_VALUE && kj != Int.MAX_VALUE && ik + kj < ij) dist[i to j] = ik + kj + for ((u, v) in vertices * vertices) { + dist[v to u] = if (v == u) 0 else Int.MAX_VALUE + } + for (e in adjList) { dist[e.first to e.second] = 1 } + while (true) { + var done = true + for ((k, i, j) in vertices * vertices * vertices) { + if (dist[i to k]!! < Int.MAX_VALUE && dist[k to j]!! < Int.MAX_VALUE) { + val newDist = dist[i to k]!! + dist[k to j]!! + if (newDist < dist[i to j]!!) { dist[i to j] = newDist; done = false } + } + } + if (done) break } dist } - val , E: IEdge, V: IVertex> IGraph.degMap: Map by cache { vertices.associateWith { it.neighbors.size } } val , E: IEdge, V: IVertex> IGraph.edges: Set by cache { edgMap.values.flatten().toSet() } val , E: IEdge, V: IVertex> IGraph.edgList: List<Π2> by cache { vertices.flatMap { s -> s.outgoing.map { s to it } } } diff --git a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt index a8b6855e..85be2071 100644 --- a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt +++ b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt @@ -197,18 +197,37 @@ class BarHillelTest { val gram = Grammars.seq2parsePythonCFG.noEpsilonOrNonterminalStubs val toRepair = "NAME = ( NAME . NAME ( NAME NEWLINE".tokenizeByWhitespace() val levBall = makeLevFSA(toRepair, 3, gram.terminals) +// println(levBall.graph.toDot()) +// throw Exception("") val intGram = gram.intersectLevFSA(levBall) +// val part= intGram.nonterminals.map { it.substringAfter(',') +// .substringBefore(',') }.toSet().filter { it in gram.nonterminals } +// +// println("Part: $part") +// println("Nopart: ${gram.nonterminals - part}") + // .also { println("LEV ∩ CFG grammar:\n${it.pretty}") } +// println(intGram.prettyPrint()) val clock = TimeSource.Monotonic.markNow() val template = List(toRepair.size + 2) { "_" } - val lbhSet = intGram.enumSeq(template) - .onEach { - println(it) + + val lbhSet = intGram.enumSeq(template).onEachIndexed { i, it -> + if (i < 10) { + println(it) + val pf = intGram.enumTree(it.tokenizeByWhitespace()).toList() + println("Found " + pf.size + " parse trees") + println(pf.first().prettyPrint()) + println("\n\n") + } + assertTrue(it in gram.language) assertTrue(levBall.recognizes(it)) }.toList() +// Total trees in PTree: 29332695 +// Found 14785 solutions using Levenshtein/Bar-Hillel + println("Found ${lbhSet.size} solutions using Levenshtein/Bar-Hillel") println("Enumerative solver took ${clock.elapsedNow().inWholeMilliseconds}ms") From e766fee856a4a966a5f349cc1c3b1c2db4a9ba7c Mon Sep 17 00:00:00 2001 From: breandan Date: Mon, 30 Oct 2023 20:26:32 -0400 Subject: [PATCH 2/3] add missing knight arc and inference rules --- .../kaliningraph/graphs/LabeledGraph.kt | 5 +- .../ai/hypergraph/kaliningraph/parsing/FSA.kt | 3 + .../kaliningraph/parsing/Levenshtein.kt | 61 +++++++++++-------- .../ai/hypergraph/kaliningraph/types/Graph.kt | 5 +- .../kaliningraph/parsing/BarHillelTest.kt | 7 +-- 5 files changed, 47 insertions(+), 34 deletions(-) diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/graphs/LabeledGraph.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/graphs/LabeledGraph.kt index e00cc3a4..8a3f852b 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/graphs/LabeledGraph.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/graphs/LabeledGraph.kt @@ -101,10 +101,11 @@ open class LGVertex internal constructor( var occupied: Boolean = false constructor( - label: String = randomString(), + label: String = "#RGEN_" + randomString(), id: String = label, out: Set = emptySet() - ) : this(label = label, id = id, edgeMap = { s -> out.map { t -> LabeledEdge(s, t) }.toSet() }) + ) : this(label = label, id = id, edgeMap = { s -> + out.map { t -> LabeledEdge(s, t, label.substringBefore("#RGEN_")) }.toSet() }) constructor(lgv: LGVertex, edgeMap: (LGVertex) -> Set) : this(label = lgv.label, id = lgv.id, edgeMap = edgeMap) diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/FSA.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/FSA.kt index 6c8fe853..519595a2 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/FSA.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/FSA.kt @@ -31,6 +31,9 @@ data class FSA(val Q: TSA, val init: Set<Σᐩ>, val final: Set<Σᐩ>) { // println("$acc --$sym--> $nextStates") nextStates } intersect final).isNotEmpty() + + fun toDot() = + graph.toDot(graph.vertices.filter { it.label in final }.toSet()) } val TSA.states by cache { flatMap { listOf(it.π1, it.π3) }.toSet() } diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt index 0f8d779d..44ca8fef 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt @@ -76,43 +76,54 @@ fun makeLevFSA( fun pd(i: Int, digits: Int) = i.toString().padStart(digits, '0') +/* + s∈Σ i∈[0,n] j∈[1,k] +----------------------- + (q_i,j−1 -s→ q_i,j)∈δ +*/ + fun upArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>, digits: Int): TSA = ((0.. str.size <= i || str[i] != s } .filter { (i, j, _) -> i <= str.size || i - str.size < j } - .map { (i, j, s) -> i to j-1 to s to i to j } - .map { (a, b, s, d, e) -> - pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits) - }.map { (a, b, s, d, e) -> - "q_$a/$b" to s to "q_$d/$e" - }.toSet() + .map { (i, j, s) -> i to j-1 to s to i to j }.postProc(digits) + +/* + s∈Σ i∈[1,n] j ∈[1,k] +------------------------- + (q_i−1,j−1 -s→ q_i,j)∈δ +*/ fun diagArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>, digits: Int): TSA = ((1.. str.size <= i - 1 || str[i-1] != s } .filter { (i, j, _) -> i <= str.size || i - str.size <= j } - .map { (i, j, s) -> i-1 to j-1 to s to i to j } - .map { (a, b, s, d, e) -> - pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits) - }.map { (a, b, s, d, e) -> - "q_$a/$b" to s to "q_$d/$e" - }.toSet() + .map { (i, j, s) -> i-1 to j-1 to s to i to j }.postProc(digits) + +/* + s=σ_i i∈[1,n] j∈[0,k] +----------------------- + (q_i−1,j -s→ q_i,j)∈δ +*/ fun rightArcs(idx: Int, dist: Int, letter: Σᐩ, digits: Int): TSA = (setOf(idx + 1) * (0..dist).toSet() * setOf(letter)) - .map { (i, j, s) -> i-1 to j to s to i to j } - .map { (a, b, s, d, e) -> - pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits) - }.map { (a, b, s, d, e) -> - "q_$a/$b" to s to "q_$d/$e" - }.toSet() + .map { (i, j, s) -> i-1 to j to s to i to j }.postProc(digits) + +/* + s=σ_i i∈[2,n] j∈[1,k] +------------------------- + (q_i−2,j−1 -s→ q_i,j)∈δ +*/ fun knightArcs(idx: Int, dist: Int, letter: Σᐩ, digits: Int): TSA = - if (idx <= 1) setOf() + if (idx < 1) setOf() else (setOf(idx + 1) * (1..dist).toSet() * setOf(letter)) - .map { (i, j, s) -> i-2 to j-1 to s to i to j } - .map { (a, b, s, d, e) -> - pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits) - }.map { (a, b, s, d, e) -> - "q_$a/$b" to s to "q_$d/$e" - }.toSet() \ No newline at end of file + .map { (i, j, s) -> i-2 to j-1 to s to i to j }.postProc(digits) + +fun List<Π5>.postProc(digits: Int) = + map { (a, b, s, d, e) -> + pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits) + }.map { (a, b, s, d, e) -> + "q_$a/$b" to s to "q_$d/$e" + }.toSet() \ No newline at end of file diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt index 5ea217a7..68d76e28 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt @@ -3,6 +3,7 @@ package ai.hypergraph.kaliningraph.types import ai.hypergraph.kaliningraph.* import ai.hypergraph.kaliningraph.cache.LRUCache import ai.hypergraph.kaliningraph.graphs.* +import ai.hypergraph.kaliningraph.parsing.Σᐩ import ai.hypergraph.kaliningraph.tensor.* import ai.hypergraph.kaliningraph.theory.wl import kotlin.js.JsName @@ -147,7 +148,7 @@ interface IGraph: IGF, Set, Encodable fun asString() = edgList.map { "${it.first} -> ${it.second.target}" }.formatAsGrid().toString() - fun toDot(): String { + fun toDot(highlight: Set = setOf()): String { fun String.htmlify() = replace("<", "<").replace(">", ">") return """ @@ -155,7 +156,7 @@ interface IGraph: IGF, Set, Encodable graph ["concentrate"="true","rankdir"="LR","bgcolor"="transparent","margin"="0.0","compound"="true","nslimit"="20"] ${ vertices.joinToString("\n") { - """"${it.id.htmlify()}" ["color"="black","fontcolor"="black","fontname"="JetBrains Mono","fontsize"="15","penwidth"="2.0","shape"="Mrecord"]""" } + """"${it.id.htmlify()}" ["color"="black","fontcolor"="black","fontname"="JetBrains Mono","fontsize"="15","penwidth"="2.0","shape"="Mrecord"${if(it in highlight)""","fillcolor"="lightgray","style"="filled"""" else ""}]""" } } ${edgList.joinToString("\n") { (v, e) -> """"${v.id.htmlify()}" -> "${e.target.id.htmlify()}" ["color"="${ if (v is LGVertex && v.occupied) "red" else "black" }","arrowhead"="normal","penwidth"="2.0","label"="${(e as? LabeledEdge)?.label ?: ""}"]""" } diff --git a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt index 85be2071..977ade6c 100644 --- a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt +++ b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt @@ -1,10 +1,7 @@ package ai.hypergraph.kaliningraph.parsing +import Grammars import ai.hypergraph.kaliningraph.* -import ai.hypergraph.kaliningraph.sampling.pow -import com.ionspin.kotlin.bignum.decimal.* -import com.ionspin.kotlin.bignum.integer.toBigInteger -import kotlin.math.pow import kotlin.test.* import kotlin.time.* @@ -197,7 +194,7 @@ class BarHillelTest { val gram = Grammars.seq2parsePythonCFG.noEpsilonOrNonterminalStubs val toRepair = "NAME = ( NAME . NAME ( NAME NEWLINE".tokenizeByWhitespace() val levBall = makeLevFSA(toRepair, 3, gram.terminals) -// println(levBall.graph.toDot()) +// println(levBall.toDot()) // throw Exception("") val intGram = gram.intersectLevFSA(levBall) // val part= intGram.nonterminals.map { it.substringAfter(',') From f8565c47f3334054108954a8c94d3f2a00c2dfde Mon Sep 17 00:00:00 2001 From: breandan Date: Sat, 4 Nov 2023 09:33:12 -0400 Subject: [PATCH 3/3] compare solver times --- .../kaliningraph/parsing/BarHillel.kt | 5 ++- .../kaliningraph/parsing/Levenshtein.kt | 17 ++++---- .../kaliningraph/parsing/SeqValiant.kt | 2 +- .../kaliningraph/parsing/SetValiant.kt | 4 +- .../kaliningraph/parsing/SortValiant.kt | 4 +- .../kaliningraph/parsing/BarHillelTest.kt | 41 ++++++++++++++----- .../kaliningraph/parsing/Grammars.kt | 6 +++ .../kaliningraph/parsing/SetValiantTest.kt | 30 +++++++++++--- .../kaliningraph/sat/SATValiantTest.kt | 3 +- 9 files changed, 79 insertions(+), 33 deletions(-) diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt index c04ab387..60509942 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt @@ -12,6 +12,9 @@ infix fun CFG.intersectLevFSA(fsa: FSA): CFG = freeze().intersectLevFSAP(fsa) fun CFG.makeLevGrammar(source: List<Σᐩ>, distance: Int) = intersectLevFSA(makeLevFSA(source, distance, terminals)) +fun CFG.barHillelRepair(prompt: List<Σᐩ>, distance: Int) = + makeLevGrammar(prompt, distance).enumSeq(List(prompt.size + distance) { "_" }) + private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG { var clock = TimeSource.Monotonic.markNow() val initFinal = @@ -116,7 +119,7 @@ fun CFG.dropVestigialProductions( .apply { removeAll { prod -> prod.RHS.any { criteria(it) && it !in nts } } } .freeze().removeUselessSymbols() - println("Removed ${size - rw.size} vestigial productions.") + println("Removed ${size - rw.size} vestigial productions, resulting in ${rw.size} productions.") return if (rw.size == size) this else rw.dropVestigialProductions(criteria) } diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt index 44ca8fef..12098418 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt @@ -59,7 +59,8 @@ fun makeLevFSA( (upArcs(str, dist, alphabet, digits) + diagArcs(str, dist, alphabet, digits) + str.mapIndexed { i, it -> rightArcs(i, dist, it, digits) }.flatten() + - str.mapIndexed { i, it -> knightArcs(i, dist, it, digits) }.flatten()).let { Q -> + str.mapIndexed { i, it -> knightArcs(i, dist, it, digits) }.flatten()) + .let { Q -> val initialStates = setOf("q_" + pd(0, digits).let { "$it/$it" }) fun Σᐩ.unpackCoordinates() = substringAfter('_').split('/') @@ -83,10 +84,10 @@ fun pd(i: Int, digits: Int) = i.toString().padStart(digits, '0') */ fun upArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>, digits: Int): TSA = - ((0.. str.size <= i || str[i] != s } .filter { (i, j, _) -> i <= str.size || i - str.size < j } - .map { (i, j, s) -> i to j-1 to s to i to j }.postProc(digits) + .map { (i, j, s) -> i to j - 1 to s to i to j }.postProc(digits) /* s∈Σ i∈[1,n] j ∈[1,k] @@ -95,10 +96,10 @@ fun upArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>, digits: Int): TSA */ fun diagArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>, digits: Int): TSA = - ((1.. str.size <= i - 1 || str[i-1] != s } + ((1.. str.size <= i - 1 || str[i - 1] != s } .filter { (i, j, _) -> i <= str.size || i - str.size <= j } - .map { (i, j, s) -> i-1 to j-1 to s to i to j }.postProc(digits) + .map { (i, j, s) -> i - 1 to j - 1 to s to i to j }.postProc(digits) /* s=σ_i i∈[1,n] j∈[0,k] @@ -108,7 +109,7 @@ fun diagArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>, digits: Int): TS fun rightArcs(idx: Int, dist: Int, letter: Σᐩ, digits: Int): TSA = (setOf(idx + 1) * (0..dist).toSet() * setOf(letter)) - .map { (i, j, s) -> i-1 to j to s to i to j }.postProc(digits) + .map { (i, j, s) -> i - 1 to j to s to i to j }.postProc(digits) /* s=σ_i i∈[2,n] j∈[1,k] @@ -119,7 +120,7 @@ fun rightArcs(idx: Int, dist: Int, letter: Σᐩ, digits: Int): TSA = fun knightArcs(idx: Int, dist: Int, letter: Σᐩ, digits: Int): TSA = if (idx < 1) setOf() else (setOf(idx + 1) * (1..dist).toSet() * setOf(letter)) - .map { (i, j, s) -> i-2 to j-1 to s to i to j }.postProc(digits) + .map { (i, j, s) -> i - 2 to j - 1 to s to i to j }.postProc(digits) fun List<Π5>.postProc(digits: Int) = map { (a, b, s, d, e) -> diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt index 137e4d5d..4c2c0586 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt @@ -95,7 +95,7 @@ class PTree(val root: String = "ε.", val branches: List<Π2A> = listOf() fun sampleStrWithoutReplacement(): Sequence = sequence { println("Total trees in PTree: $totalTrees") var i = BigInteger.ZERO - while (i < totalTrees) yield(decodeString(i++).first) + while (i < 3 * totalTrees) yield(decodeString(i++).first) }.distinct() // Samples instantaneously from the parse forest, but may return duplicates diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiant.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiant.kt index 4d9a6d14..3dd318c0 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiant.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiant.kt @@ -2,8 +2,8 @@ package ai.hypergraph.kaliningraph.parsing +import ai.hypergraph.kaliningraph.* import ai.hypergraph.kaliningraph.sampling.* -import ai.hypergraph.kaliningraph.splitProd import ai.hypergraph.kaliningraph.tensor.* import ai.hypergraph.kaliningraph.types.* @@ -302,7 +302,7 @@ fun List<Σᐩ>.solve( ): Sequence<Σᐩ> = genCandidates(CFG, fillers) // .also { println("Solving (Complexity: ${fillers.size.pow(count { it == "_" })}): ${joinToString(" ")}") } - .takeWhile { takeMoreWhile() }.filter { it.matches(CFG) } + .takeWhile { takeMoreWhile() }.filter { it.matches(CFG) }.map { it.removeEpsilon() } fun List<Σᐩ>.genCandidates(CFG: CFG, fillers: Set<Σᐩ> = CFG.terminals): Sequence<Σᐩ> = MDSamplerWithoutReplacement(fillers, count { it == HOLE_MARKER }).map { diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SortValiant.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SortValiant.kt index 64b3401c..14fc8ad0 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SortValiant.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SortValiant.kt @@ -87,8 +87,8 @@ data class Choice(val tokens: List<Σᐩ>, val weight: Float): Comparable = - compareBy { it.weight }.thenBy { it.sanitized.size }.thenBy { it.asString } + val comparator: Comparator = compareBy { it.weight } + .thenBy { it.sanitized.size }.thenBy { it.asString } } override fun compareTo(other: Choice): Int = comparator.compare(this, other) diff --git a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt index 977ade6c..dc6a2015 100644 --- a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt +++ b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillelTest.kt @@ -2,6 +2,7 @@ package ai.hypergraph.kaliningraph.parsing import Grammars import ai.hypergraph.kaliningraph.* +import ai.hypergraph.kaliningraph.sampling.all import kotlin.test.* import kotlin.time.* @@ -210,17 +211,17 @@ class BarHillelTest { val template = List(toRepair.size + 2) { "_" } val lbhSet = intGram.enumSeq(template).onEachIndexed { i, it -> - if (i < 10) { - println(it) - val pf = intGram.enumTree(it.tokenizeByWhitespace()).toList() - println("Found " + pf.size + " parse trees") - println(pf.first().prettyPrint()) - println("\n\n") - } - - assertTrue(it in gram.language) - assertTrue(levBall.recognizes(it)) - }.toList() + if (i < 10) { + println(it) + val pf = intGram.enumTree(it.tokenizeByWhitespace()).toList() + println("Found " + pf.size + " parse trees") + println(pf.first().prettyPrint()) + println("\n\n") + } + + assertTrue(it in gram.language) + assertTrue(levBall.recognizes(it)) + }.toList() // Total trees in PTree: 29332695 // Found 14785 solutions using Levenshtein/Bar-Hillel @@ -266,4 +267,22 @@ class BarHillelTest { println("Inverse CFL density (Σ^$n/|T($n)|): ~1/${it.inverseDensity}") } } + +/* +./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.parsing.BarHillelTest.testToyArith" +*/ + @Test + fun testToyArith() { + val prompt = ") ( (".tokenizeByWhitespace() + val overwrittenRepairs = + Grammars.toyArith.barHillelRepair(prompt, 3).toSet() + .also { println("Found ${it.size} overwritten repairs.") } + + val allTriples = Grammars.toyArith.solveSeq(List(3) { "_" }) + .distinct().toSet().also { println("Found ${it.size} total triples.") } + + val allTriplesMinusOverwritten = overwrittenRepairs - allTriples + allTriplesMinusOverwritten.forEach { println(it) } + println("Found ${allTriplesMinusOverwritten.size} non-overwritten triples.") + } } \ No newline at end of file diff --git a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/Grammars.kt b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/Grammars.kt index b89138a2..d340d196 100644 --- a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/Grammars.kt +++ b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/Grammars.kt @@ -19,6 +19,12 @@ object Grammars { N -> ! """.trimIndent().parseCFG().noNonterminalStubs + val toyArith = """ + S -> S + S | S * S | S - S | S / S | ( S ) | - S + S -> 0 | 1 | 2 | 3 | 4 + S -> X | Y | Z + """.trimIndent().parseCFG().noNonterminalStubs + val ocamlCFG = """ S -> X X -> A | V | ( X , X ) | X X | ( X ) diff --git a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt index 5e8a3965..62991642 100644 --- a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt +++ b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt @@ -1,5 +1,6 @@ package ai.hypergraph.kaliningraph.parsing +import Grammars.toyArith import ai.hypergraph.kaliningraph.* import ai.hypergraph.kaliningraph.tensor.seekFixpoint import ai.hypergraph.kaliningraph.types.π2 @@ -78,16 +79,12 @@ class SetValiantTest { */ @Test fun testArithmetic() { - """ - S -> S + S | S * S | S - S | S / S | ( S ) - S -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 - S -> X | Y | Z - """.let { cfg -> + toyArith.let { cfg -> assertTrue("( 1 + 2 * 3 ) / 4".matches(cfg)) assertFalse("( 1 + 2 * 3 ) - ) / 4".matches(cfg)) assertFalse("( 1 + 2 * 3 ) - ( ) / 4".matches(cfg)) println(cfg.parse("( 1 + ( 2 * 3 ) ) / 4")?.prettyPrint()) - println(cfg.parseCFG().prettyPrint()) + println(cfg.prettyPrint()) } } @@ -513,4 +510,25 @@ class SetValiantTest { .reduce { a, b -> union(a, b) } }.also { println("Merged a 10^6 bitvecs in ${it.inWholeMilliseconds}ms.") } // Should be ~5000ms } + +/* +./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.parsing.SetValiantTest.testCompareSolvers" +*/ + @Test + fun testCompareSolvers() { + val prompt = "_ _ ( _ _ _".tokenizeByWhitespace() + val enumSeq = measureTimedValue { toyArith.enumSeq(prompt).toSet() } + val solveSeq = measureTimedValue { toyArith.solveSeq(prompt).toSet() } + val origSet = measureTimedValue { prompt.solve(toyArith).toSet() } + +// EnumSeq: 584 (842.693834ms) +// SolvSeq: 584 (3.802375ms) +// SetCYK: 584 (7.388834667s) + enumSeq.also { println("EnumSeq: ${it.value.size} (${it.duration})") }.value + solveSeq.also { println("SolvSeq: ${it.value.size} (${it.duration})") }.value + origSet.also { println("SetCYK: ${it.value.size} (${it.duration})") }.value + + assertEquals(origSet.value, enumSeq.value, "EnumSeq was missing:" + (origSet.value - enumSeq.value)) + assertEquals(origSet.value, solveSeq.value) + } } \ No newline at end of file diff --git a/src/jvmTest/kotlin/ai/hypergraph/kaliningraph/sat/SATValiantTest.kt b/src/jvmTest/kotlin/ai/hypergraph/kaliningraph/sat/SATValiantTest.kt index e333fd37..33a63731 100644 --- a/src/jvmTest/kotlin/ai/hypergraph/kaliningraph/sat/SATValiantTest.kt +++ b/src/jvmTest/kotlin/ai/hypergraph/kaliningraph/sat/SATValiantTest.kt @@ -7,7 +7,6 @@ import ai.hypergraph.kaliningraph.types.* import ai.hypergraph.kaliningraph.visualization.show import org.junit.jupiter.api.Test import kotlin.test.* -import kotlin.time.TimeSource /* ./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.sat.SATValiantTest" @@ -884,7 +883,7 @@ class SATValiantTest { /* ./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.sat.SATValiantTest.testLongTerminals" - */ +*/ @Test fun testLongTerminals() { println("START -> A B C D E F G H I".parseCFG().prettyPrint())