Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Nov 5, 2023
2 parents 23ac634 + f8565c4 commit 9e1c4ac
Show file tree
Hide file tree
Showing 14 changed files with 177 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,11 @@ open class LGVertex internal constructor(
var occupied: Boolean = false

constructor(
label: String = randomString(),
label: String = "#RGEN_" + randomString(),
id: String = label,
out: Set<LGVertex> = emptySet()
) : this(label = label, id = id, edgeMap = { s -> out.map { t -> LabeledEdge(s, t) }.toSet() })
) : this(label = label, id = id, edgeMap = { s ->
out.map { t -> LabeledEdge(s, t, label.substringBefore("#RGEN_")) }.toSet() })

constructor(lgv: LGVertex, edgeMap: (LGVertex) -> Set<LabeledEdge>) :
this(label = lgv.label, id = lgv.id, edgeMap = edgeMap)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
package ai.hypergraph.kaliningraph.parsing

import ai.hypergraph.kaliningraph.types.*
import kotlin.math.absoluteValue
import kotlin.time.TimeSource

infix fun FSA.intersectLevFSA(cfg: CFG) = cfg.freeze().intersectLevFSAP(this)
infix fun FSA.intersectLevFSA(cfg: CFG) = cfg.intersectLevFSA(this)
// http://www.cs.umd.edu/~gasarch/BLOGPAPERS/cfg.pdf#page=2
// https://browse.arxiv.org/pdf/2209.06809.pdf#page=5

infix fun CFG.intersectLevFSA(fsa: FSA): CFG = freeze().intersectLevFSAP(fsa)

fun CFG.makeLevGrammar(source: List<Σᐩ>, distance: Int) =
intersectLevFSA(makeLevFSA(source, distance, terminals))

fun CFG.barHillelRepair(prompt: List<Σᐩ>, distance: Int) =
makeLevGrammar(prompt, distance).enumSeq(List(prompt.size + distance) { "_" })

private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG {
var clock = TimeSource.Monotonic.markNow()
val initFinal =
Expand All @@ -18,9 +23,9 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG {
val transits =
fsa.Q.map { (q, a, r) -> "[$q,$a,$r]" to listOf(a) }

fun Triple<Σᐩ, Σᐩ, Σᐩ>.isValid(nts: Triple<Σᐩ, Σᐩ, Σᐩ>): Boolean {
fun Triple<Σᐩ, Σᐩ, Σᐩ>.isCompatibleWith(nts: Triple<Σᐩ, Σᐩ, Σᐩ>): Boolean {
fun Σᐩ.coords(): Pair<Int, Int> = drop(2).run {
(length / 2).let {substring(0,it).toInt() to substring(it + 1).toInt() }
(length / 2).let { substring(0, it).toInt() to substring(it + 1).toInt() }
}

fun Pair<Int, Int>.dominates(other: Pair<Int, Int>) =
Expand All @@ -37,15 +42,15 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG {
fun IntRange.overlaps(other: IntRange) =
(other.first in first..last) || (other.last in first..last)

fun parikhBounds(nt: Σᐩ): IntRange = parikhBounds[nt] ?: -1..-1
fun lengthBounds(nt: Σᐩ): IntRange = lengthBounds[nt] ?: -1..-1

// "[$p,$A,$r] -> [$p,$B,$q] [$q,$C,$r]"
fun isCompatible() =
first.coords().dominates(second.coords())
&& second.coords().dominates(third.coords())
&& parikhBounds(nts.first).overlaps(SPLP(first, third))
&& parikhBounds(nts.second).overlaps(SPLP(first, second))
&& parikhBounds(nts.third).overlaps(SPLP(second, third))
&& lengthBounds(nts.first).overlaps(SPLP(first, third))
&& lengthBounds(nts.second).overlaps(SPLP(first, second))
&& lengthBounds(nts.third).overlaps(SPLP(second, third))

return isCompatible()
}
Expand All @@ -63,7 +68,7 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG {
triples
// CFG ∩ FSA - in general we are not allowed to do this, but it works
// because we assume a Levenshtein FSA, which is monotone and acyclic.
.filter { it.isValid(A to B to C) }
.filter { it.isCompatibleWith(A to B to C) }
.map { (p, q, r) -> "[$p,$A,$r]" to listOf("[$p,$B,$q]", "[$q,$C,$r]") }
}.flatten()

Expand Down Expand Up @@ -112,9 +117,9 @@ fun CFG.dropVestigialProductions(
// val reachable = reachableSymbols()
val rw = toMutableSet()
.apply { removeAll { prod -> prod.RHS.any { criteria(it) && it !in nts } } }
.freeze().removeUselessSymbols()//.removeUnreachable().freeze().removeNonGenerating()
.freeze().removeUselessSymbols()

println("Removed ${size - rw.size} vestigial productions.")
println("Removed ${size - rw.size} vestigial productions, resulting in ${rw.size} productions.")

return if (rw.size == size) this else rw.dropVestigialProductions(criteria)
}
Expand Down Expand Up @@ -149,21 +154,21 @@ infix fun CFG.intersect(fsa: FSA): CFG {
}

// Tracks the number of tokens a given nonterminal can represent
// e.g., a NT with a Parikh bound of 1..3 can parse { s: Σ* | |s| ∈ [1, 3] }
val CFG.parikhBounds: Map<Σᐩ, IntRange> by cache {
// e.g., a NT with a bound of 1..3 can parse { s: Σ^[1, 3] }
val CFG.lengthBounds: Map<Σᐩ, IntRange> by cache {
val clock = TimeSource.Monotonic.markNow()
val epsFree = noEpsilonOrNonterminalStubs.freeze()
val temp = List(20) { "_" }
val tpl = List(20) { "_" }
val map =
epsFree.nonterminals.associateWith { -1..-1 }.toMutableMap()
epsFree.initPForestMat(temp).seekFixpoint().diagonals
.mapIndexed { index, sets ->
val nonterminalsAtLevel = sets.flatMap { it.map { it.key } }
nonterminalsAtLevel.forEach { nt ->
map[nt]?.let {
(if (it.first < 0) (index + 1) else it.first)..(index + 1)
}?.let { map[nt] = it }
}
epsFree.initPForestMat(tpl).seekFixpoint().diagonals.mapIndexed { idx, sets ->
sets.flatMap { it.map { it.key } }.forEach { nt ->
map[nt]?.let {
(if (it.first < 0) (idx + 1) else it.first)..(idx + 1)
}?.let { map[nt] = it }
}
}

println("Computed NT length bounds in ${clock.elapsedNow()}")
map
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ val Production.RHS: List<Σᐩ> get() =
* will be slow to compute the first time, but much faster on subsequent calls.
* Storing the hashCode() in a field avoids recomputing it on every read.
*/
fun CFG.freeze(): CFG = FrozenCFG(this)
fun CFG.freeze(): CFG = if (this is FrozenCFG) this else FrozenCFG(this)
private class FrozenCFG(val cfg: CFG): CFG by cfg {
val cfgId = cfg.hashCode()
override fun equals(other: Any?) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ data class FSA(val Q: TSA, val init: Set<Σᐩ>, val final: Set<Σᐩ>) {
// println("$acc --$sym--> $nextStates")
nextStates
} intersect final).isNotEmpty()

fun toDot() =
graph.toDot(graph.vertices.filter { it.label in final }.toSet())
}

val TSA.states by cache { flatMap { listOf(it.π1, it.π3) }.toSet() }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ fun makeLevFSA(
(upArcs(str, dist, alphabet, digits) +
diagArcs(str, dist, alphabet, digits) +
str.mapIndexed { i, it -> rightArcs(i, dist, it, digits) }.flatten() +
str.mapIndexed { i, it -> knightArcs(i, dist, it, digits) }.flatten()).let { Q ->
str.mapIndexed { i, it -> knightArcs(i, dist, it, digits) }.flatten())
.let { Q ->
val initialStates = setOf("q_" + pd(0, digits).let { "$it/$it" })
fun Σᐩ.unpackCoordinates() =
substringAfter('_').split('/')
Expand All @@ -76,43 +77,54 @@ fun makeLevFSA(

fun pd(i: Int, digits: Int) = i.toString().padStart(digits, '0')

/*
s∈Σ i∈[0,n] j∈[1,k]
-----------------------
(q_i,j−1 -s→ q_i,j)∈δ
*/

fun upArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>, digits: Int): TSA =
((0..<str.size+dist).toSet() * (1..dist).toSet() * alphabet)
((0..<str.size + dist).toSet() * (1..dist).toSet() * alphabet)
.filter { (i, _, s) -> str.size <= i || str[i] != s }
.filter { (i, j, _) -> i <= str.size || i - str.size < j }
.map { (i, j, s) -> i to j-1 to s to i to j }
.map { (a, b, s, d, e) ->
pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits)
}.map { (a, b, s, d, e) ->
"q_$a/$b" to s to "q_$d/$e"
}.toSet()
.map { (i, j, s) -> i to j - 1 to s to i to j }.postProc(digits)

/*
s∈Σ i∈[1,n] j ∈[1,k]
-------------------------
(q_i−1,j−1 -s→ q_i,j)∈δ
*/

fun diagArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>, digits: Int): TSA =
((1..<str.size+dist).toSet() * (1..dist).toSet() * alphabet)
.filter { (i, _, s) -> str.size <= i - 1 || str[i-1] != s }
((1..<str.size + dist).toSet() * (1..dist).toSet() * alphabet)
.filter { (i, _, s) -> str.size <= i - 1 || str[i - 1] != s }
.filter { (i, j, _) -> i <= str.size || i - str.size <= j }
.map { (i, j, s) -> i-1 to j-1 to s to i to j }
.map { (a, b, s, d, e) ->
pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits)
}.map { (a, b, s, d, e) ->
"q_$a/$b" to s to "q_$d/$e"
}.toSet()
.map { (i, j, s) -> i - 1 to j - 1 to s to i to j }.postProc(digits)

/*
s=σ_i i∈[1,n] j∈[0,k]
-----------------------
(q_i−1,j -s→ q_i,j)∈δ
*/

fun rightArcs(idx: Int, dist: Int, letter: Σᐩ, digits: Int): TSA =
(setOf(idx + 1) * (0..dist).toSet() * setOf(letter))
.map { (i, j, s) -> i-1 to j to s to i to j }
.map { (a, b, s, d, e) ->
pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits)
}.map { (a, b, s, d, e) ->
"q_$a/$b" to s to "q_$d/$e"
}.toSet()
.map { (i, j, s) -> i - 1 to j to s to i to j }.postProc(digits)

/*
s=σ_i i∈[2,n] j∈[1,k]
-------------------------
(q_i−2,j−1 -s→ q_i,j)∈δ
*/

fun knightArcs(idx: Int, dist: Int, letter: Σᐩ, digits: Int): TSA =
if (idx <= 1) setOf()
if (idx < 1) setOf()
else (setOf(idx + 1) * (1..dist).toSet() * setOf(letter))
.map { (i, j, s) -> i-2 to j-1 to s to i to j }
.map { (a, b, s, d, e) ->
pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits)
}.map { (a, b, s, d, e) ->
"q_$a/$b" to s to "q_$d/$e"
}.toSet()
.map { (i, j, s) -> i - 2 to j - 1 to s to i to j }.postProc(digits)

fun List5<Int, Int, Σᐩ, Int, Int>>.postProc(digits: Int) =
map { (a, b, s, d, e) ->
pd(a, digits) to pd(b, digits) to s to pd(d, digits) to pd(e, digits)
}.map { (a, b, s, d, e) ->
"q_$a/$b" to s to "q_$d/$e"
}.toSet()
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class PTree(val root: String = "ε.", val branches: List<Π2A<PTree>> = listOf()
val (lb, rb) = shuffledBranches[remainder.toString().toInt()]
val (l, quotient2) = lb.decodeTree(quotient1)
val (r, quotient3) = rb.decodeTree(quotient2)
val concat = Tree(l.root, children = arrayOf(l, r))
val concat = Tree(root, children = arrayOf(l, r))
return concat to quotient3
}

Expand All @@ -95,7 +95,7 @@ class PTree(val root: String = "ε.", val branches: List<Π2A<PTree>> = listOf()
fun sampleStrWithoutReplacement(): Sequence<String> = sequence {
println("Total trees in PTree: $totalTrees")
var i = BigInteger.ZERO
while (i < totalTrees) yield(decodeString(i++).first)
while (i < 3 * totalTrees) yield(decodeString(i++).first)
}.distinct()

// Samples instantaneously from the parse forest, but may return duplicates
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

package ai.hypergraph.kaliningraph.parsing

import ai.hypergraph.kaliningraph.*
import ai.hypergraph.kaliningraph.sampling.*
import ai.hypergraph.kaliningraph.splitProd
import ai.hypergraph.kaliningraph.tensor.*
import ai.hypergraph.kaliningraph.types.*

Expand Down Expand Up @@ -302,7 +302,7 @@ fun List<Σᐩ>.solve(
): Sequence<Σᐩ> =
genCandidates(CFG, fillers)
// .also { println("Solving (Complexity: ${fillers.size.pow(count { it == "_" })}): ${joinToString(" ")}") }
.takeWhile { takeMoreWhile() }.filter { it.matches(CFG) }
.takeWhile { takeMoreWhile() }.filter { it.matches(CFG) }.map { it.removeEpsilon() }

fun List<Σᐩ>.genCandidates(CFG: CFG, fillers: Set<Σᐩ> = CFG.terminals): Sequence<Σᐩ> =
MDSamplerWithoutReplacement(fillers, count { it == HOLE_MARKER }).map {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ data class Choice(val tokens: List<Σᐩ>, val weight: Float): Comparable<Choice
constructor(token: Σᐩ): this(listOf(token), if ("ε" in token) 0f else 1f)

companion object {
val comparator: Comparator<Choice> =
compareBy<Choice> { it.weight }.thenBy { it.sanitized.size }.thenBy { it.asString }
val comparator: Comparator<Choice> = compareBy<Choice> { it.weight }
.thenBy { it.sanitized.size }.thenBy { it.asString }
}

override fun compareTo(other: Choice): Int = comparator.compare(this, other)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@ class Tree constructor(
}
) { i, acc, it -> acc + it.toGraph("$j.$i") }

val indxInfo by lazy { if (span.first < Int.MAX_VALUE) " [${span.first}]" else "" }
val spanInfo by lazy { if (span.first < Int.MAX_VALUE) " [$span]" else "" }

fun prettyPrint(buffer: Σᐩ = "", prefix: Σᐩ = "", nextPrefix: Σᐩ = ""): Σᐩ =
if (children.isEmpty()) (buffer + prefix + "${terminal?.htmlify()} [${span.first}]\n")
if (children.isEmpty()) (buffer + prefix + "${terminal?.htmlify()}$indxInfo\n")
else children.foldIndexed("$buffer$prefix" + root.htmlify() +
(if (-1 !in span) " [$span]" else "") + "\n") { i: Int, acc: Σᐩ, it: Tree ->
(if (-1 !in span) spanInfo else "") + "\n") { i: Int, acc: Σᐩ, it: Tree ->
if (i == children.size - 1)
it.prettyPrint(acc + "", "$nextPrefix└── ", "$nextPrefix ")
else it.prettyPrint(acc, "$nextPrefix├── ", "$nextPrefix")
Expand Down
26 changes: 16 additions & 10 deletions src/commonMain/kotlin/ai/hypergraph/kaliningraph/types/Graph.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ai.hypergraph.kaliningraph.types
import ai.hypergraph.kaliningraph.*
import ai.hypergraph.kaliningraph.cache.LRUCache
import ai.hypergraph.kaliningraph.graphs.*
import ai.hypergraph.kaliningraph.parsing.Σᐩ
import ai.hypergraph.kaliningraph.tensor.*
import ai.hypergraph.kaliningraph.theory.wl
import kotlin.js.JsName
Expand Down Expand Up @@ -147,15 +148,15 @@ interface IGraph<G, E, V>: IGF<G, E, V>, Set<V>, Encodable
fun asString() =
edgList.map { "${it.first} -> ${it.second.target}" }.formatAsGrid().toString()

fun toDot(): String {
fun toDot(highlight: Set<V> = setOf()): String {
fun String.htmlify() =
replace("<", "&lt;").replace(">", "&gt;")
return """
strict digraph {
graph ["concentrate"="true","rankdir"="LR","bgcolor"="transparent","margin"="0.0","compound"="true","nslimit"="20"]
${
vertices.joinToString("\n") {
""""${it.id.htmlify()}" ["color"="black","fontcolor"="black","fontname"="JetBrains Mono","fontsize"="15","penwidth"="2.0","shape"="Mrecord"]""" }
""""${it.id.htmlify()}" ["color"="black","fontcolor"="black","fontname"="JetBrains Mono","fontsize"="15","penwidth"="2.0","shape"="Mrecord"${if(it in highlight)""","fillcolor"="lightgray","style"="filled"""" else ""}]""" }
}
${edgList.joinToString("\n") { (v, e) ->
""""${v.id.htmlify()}" -> "${e.target.id.htmlify()}" ["color"="${ if (v is LGVertex && v.occupied) "red" else "black" }","arrowhead"="normal","penwidth"="2.0","label"="${(e as? LabeledEdge)?.label ?: ""}"]""" }
Expand Down Expand Up @@ -188,18 +189,23 @@ val <G: IGraph<G, E, V>, E: IEdge<G, E, V>, V: IVertex<G, E, V>> IGraph<G, E, V>
// All pairs shortest path
val <G: IGraph<G, E, V>, E: IEdge<G, E, V>, V: IVertex<G, E, V>> IGraph<G, E, V>.APSP: Map<Pair<V, V>, Int> by cache {
val dist = mutableMapOf<Pair<V, V>, Int>()
for (v in vertices) for (w in vertices) dist[v to w] = if (v == w) 0 else Int.MAX_VALUE
for (e in edges) dist[e.source to e.target] = 1
for (k in vertices) for (i in vertices) for (j in vertices) {
val ik = dist[i to k]!!
val kj = dist[k to j]!!
val ij = dist[i to j]!!
if (ik != Int.MAX_VALUE && kj != Int.MAX_VALUE && ik + kj < ij) dist[i to j] = ik + kj
for ((u, v) in vertices * vertices) {
dist[v to u] = if (v == u) 0 else Int.MAX_VALUE
}
for (e in adjList) { dist[e.first to e.second] = 1 }
while (true) {
var done = true
for ((k, i, j) in vertices * vertices * vertices) {
if (dist[i to k]!! < Int.MAX_VALUE && dist[k to j]!! < Int.MAX_VALUE) {
val newDist = dist[i to k]!! + dist[k to j]!!
if (newDist < dist[i to j]!!) { dist[i to j] = newDist; done = false }
}
}
if (done) break
}
dist
}


val <G: IGraph<G, E, V>, E: IEdge<G, E, V>, V: IVertex<G, E, V>> IGraph<G, E, V>.degMap: Map<V, Int> by cache { vertices.associateWith { it.neighbors.size } }
val <G: IGraph<G, E, V>, E: IEdge<G, E, V>, V: IVertex<G, E, V>> IGraph<G, E, V>.edges: Set<E> by cache { edgMap.values.flatten().toSet() }
val <G: IGraph<G, E, V>, E: IEdge<G, E, V>, V: IVertex<G, E, V>> IGraph<G, E, V>.edgList: List<Π2<V, E>> by cache { vertices.flatMap { s -> s.outgoing.map { s to it } } }
Expand Down
Loading

0 comments on commit 9e1c4ac

Please sign in to comment.