Skip to content

Commit

Permalink
Levenshtein FSA works, Bar-Hillel still has issues
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Oct 8, 2023
1 parent a02b0ca commit c20e49e
Show file tree
Hide file tree
Showing 10 changed files with 48 additions and 33 deletions.
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import org.jetbrains.kotlin.gradle.targets.js.nodejs.*
plugins {
signing
`maven-publish`
kotlin("multiplatform") version "1.9.20-Beta"
kotlin("multiplatform") version "1.9.20-Beta2"
// kotlin("jupyter.api") version "0.11.0-225"
id("com.github.ben-manes.versions") version "0.48.0"
id("io.github.gradle-nexus.publish-plugin") version "2.0.0-rc-1"
Expand Down
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.3-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
networkTimeout=10000
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import ai.hypergraph.kaliningraph.types.*

infix fun FSA.intersect(cfg: CFG) = cfg.intersect(this)
// http://www.cs.umd.edu/~gasarch/BLOGPAPERS/cfg.pdf
// https://browse.arxiv.org/pdf/2209.06809.pdf#page=5

infix fun CFG.intersect(fsa: FSA): CFG {
val initFinal =
Expand All @@ -24,11 +25,14 @@ infix fun CFG.intersect(fsa: FSA): CFG {
// For every production A → σ in P, for every (p, σ, q) ∈ Q × Σ × Q
// such that δ(p, σ) = q we have the production [p, A, q] → σ in P′.
val unitProds =
unitProductions.map { (lhs, rhs) ->
val relevantTransits = fsa.Q.filter { it.π1 == rhs[0] }
relevantTransits.map { (q, a, r) -> "[$q,$lhs,$r] -> $a" }
unitProductions.map { (A, rhs) ->
val relevantTransits = fsa.Q.filter { it.π2 == rhs[0] }
relevantTransits.map { (p, σ, q) -> "[$p,$A,$q] -> $σ" }
}.flatten()

return (initFinal + transits + binaryProds + unitProds).joinToString("\n")
.parseCFG(normalize = false).removeVestigalProductions()
.parseCFG(normalize = false)
// .also { println(it.pretty) }
// .removeVestigalProductions()
// .also { println(it.size) }
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ val CFG.bindex: Bindex<Σᐩ> by cache { Bindex(nonterminals) }
val CFG.normalForm: CFG by cache { normalize() }
val CFG.graph: LabeledGraph by cache { dependencyGraph() }

val CFG.originalForm: CFG by cache { rewriteHistory[this]!![0] }
val CFG.originalForm: CFG by cache { rewriteHistory[this]?.get(0) ?: this }
val CFG.nonparametricForm: CFG by cache { rewriteHistory[this]!![1] }
//val CFG.originalForm by cache { rewriteHistory[this]!![0] }
//val CFG.nonparametricForm by cache { rewriteHistory[this]!![1] }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ data class FSA(val Q: TSA, val init: Set<Σᐩ>, val final: Set<Σᐩ>) {
val map: Map<Π2A<Σᐩ>, Set<Σᐩ>> by lazy {
Q.groupBy({ (a, b, _) -> a to b }, { (_, _, c) -> c })
.mapValues { (_, v) -> v.toSet() }
// .also { it.map { println("${it.key}=${it.value.joinToString(",", "[", "]"){if(it in init) "$it*" else if (it in final) "$it@" else it}}") } }
}

val graph by lazy {
val graph: LabeledGraph by lazy {
LabeledGraph { Q.forEach { (a, b, c) -> a[b] = c } }
}

fun recognizes(str: Σᐩ) =
(str.tokenizeByWhitespace().fold(init) { acc, sym ->
val nextStates = acc.flatMap { map[it to sym] ?: emptySet() }.toSet()
println("$acc --$sym--> $nextStates")
// println("$acc --$sym--> $nextStates")
nextStates
} intersect final).isNotEmpty()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ fun makeLevFSA(str: Σᐩ, dist: Int, alphabet: Set<Σᐩ>): FSA =
makeLevFSA(str.tokenizeByWhitespace(), dist, alphabet)

fun makeLevFSA(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>): FSA =
(upArcs(str.size, dist, alphabet) +
diagArcs(str.size, dist, alphabet) +
str.map { rightArcs(str.size, dist, "ε") }.flatten() +
str.map { knightArcs(str.size, dist, "ε") }.flatten()).let { Q ->
val initialStates = setOf("q_0,0")
(upArcs(str, dist, alphabet) +
diagArcs(str, dist, alphabet) +
str.mapIndexed { i, it -> rightArcs(i, dist, it) }.flatten() +
str.mapIndexed { i, it -> knightArcs(i, dist, it) }.flatten()).let { Q ->
val initialStates = setOf("q_0/0")
fun Σᐩ.unpackCoordinates() =
substringAfter("_").split("/")
.let { (i, j) -> i.toInt() to j.toInt() }
Expand All @@ -69,18 +69,23 @@ fun makeLevFSA(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>): FSA =
FSA(Q, initialStates, finalStates)
}

fun upArcs(len: Int, dist: Int, alphabet: Set<Σᐩ>): TSA =
((0..len).toSet() * (1..dist).toSet() * alphabet)
fun upArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>): TSA =
((0..<str.size+dist).toSet() * (1..dist).toSet() * alphabet)
.filter { (i, _, s) -> str.size <= i || str[i] != s }
.filter { (i, j, _) -> i <= str.size || i - str.size < j }
.map { (i, j, s) -> "q_$i/${j-1}" to s to "q_$i/$j" }.toSet()

fun diagArcs(len: Int, dist: Int, alphabet: Set<Σᐩ>): TSA =
((1..len).toSet() * (1..dist).toSet() * alphabet)
fun diagArcs(str: List<Σᐩ>, dist: Int, alphabet: Set<Σᐩ>): TSA =
((1..<str.size+dist).toSet() * (1..dist).toSet() * alphabet)
.filter { (i, _, s) -> str.size <= i - 1 || str[i-1] != s }
.filter { (i, j, _) -> i <= str.size || i - str.size <= j }
.map { (i, j, s) -> "q_${i-1}/${j-1}" to s to "q_$i/$j" }.toSet()

fun rightArcs(len: Int, dist: Int, letter: Σᐩ): TSA =
((1..len).toSet() * (0..dist).toSet() * setOf(letter))
fun rightArcs(idx: Int, dist: Int, letter: Σᐩ): TSA =
(setOf(idx + 1) * (0..dist).toSet() * setOf(letter))
.map { (i, j, s) -> "q_${i-1}/$j" to s to "q_$i/$j" }.toSet()

fun knightArcs(len: Int, dist: Int, letter: Σᐩ): TSA =
((2..len).toSet() * (1..dist).toSet() * setOf(letter))
fun knightArcs(idx: Int, dist: Int, letter: Σᐩ): TSA =
if (idx <= 1) setOf()
else (setOf(idx + 1) * (1..dist).toSet() * setOf(letter))
.map { (i, j, s) -> "q_${i-2}/${j-1}" to s to "q_$i/$j" }.toSet()
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fun CFG.removeVestigalProductions(
): CFG {
val rw =
filter { it.RHS.all { !criteria(it) || it in nonterminals } }
.toSet()
.toSet().removeUselessSymbols()

// println("Removed ${size - rw.size} vestigal productions.")

Expand All @@ -46,6 +46,7 @@ fun CFG.normalize(): CFG =
// Must remember to run the unit test if order changes in the future
// ./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.sat.SATValiantTest.testTLArithmetic"
.generateNonterminalStubs()
// Should only need to run this on synthetic CFGs
.removeVestigalProductions()
.also { cnf -> rewriteHistory.put(cnf.freeze(), rewrites) }
}
Expand Down Expand Up @@ -176,7 +177,7 @@ fun CFG.refactorEpsilonProds(nlbls: Set<Σᐩ> = nullableNonterminals()): CFG =
* A useful symbol is both generating and reachable.
*/

private fun CFG.removeUselessSymbols(
fun CFG.removeUselessSymbols(
generating: Set<Σᐩ> = generatingSymbols(),
reachable: Set<Σᐩ> = reachableSymbols()
): CFG = partition { (s, _) -> s in generating intersect reachable }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,5 +309,4 @@ fun CFG.handleSingleton(s: Σᐩ): Set<Σᐩ> =
else if (s.matches(Regex("<.+>")))
bimap[s.substring(1, s.length - 1)]
.mapNotNull { if (it.size == 1) it[0] else null }.toSet()
else setOf()

else setOf()
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package ai.hypergraph.kaliningraph.types

import ai.hypergraph.kaliningraph.*
import ai.hypergraph.kaliningraph.cache.LRUCache
import ai.hypergraph.kaliningraph.graphs.LGVertex
import ai.hypergraph.kaliningraph.graphs.*
import ai.hypergraph.kaliningraph.tensor.*
import ai.hypergraph.kaliningraph.theory.wl
import kotlin.js.JsName
Expand Down Expand Up @@ -157,7 +157,7 @@ interface IGraph<G, E, V>: IGF<G, E, V>, Set<V>, Encodable
""""${it.id.htmlify()}" ["color"="black","fontcolor"="black","fontname"="JetBrains Mono","fontsize"="15","penwidth"="2.0","shape"="Mrecord"]""" }
}
${edgList.joinToString("\n") { (v, e) ->
""""${v.id.htmlify()}" -> "${e.target.id.htmlify()}" ["color"="${ if (v is LGVertex && v.occupied) "red" else "black" }","arrowhead"="normal","penwidth"="2.0","label"=""]""" }
""""${v.id.htmlify()}" -> "${e.target.id.htmlify()}" ["color"="${ if (v is LGVertex && v.occupied) "red" else "black" }","arrowhead"="normal","penwidth"="2.0","label"="${(e as? LabeledEdge)?.label ?: ""}"]""" }
}
}
""".trimIndent()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,16 +223,21 @@ class BarHillelTest {
val simpleCFG = """
START -> E
O -> + | *
E -> N | E O N
E -> N O N
N -> 1 | 2
""".parseCFG().noEpsilonOrNonterminalStubs

val levFSA = makeLevFSA("1 + 2 + 1", 2, simpleCFG.terminals)
val levFSA = makeLevFSA("1 + 1", 1, simpleCFG.terminals)
// println(levFSA.graph.toDot())

println(levFSA.Q.size)
// println(levFSA.Q.size)

// val levCFG = (levFSA.intersect(simpleCFG)).also { println(it.pretty) }
val levCFG = (levFSA.intersect(simpleCFG))//.also { print(it.pretty) }
// println(levCFG.graph.toDot())

println(levFSA.recognizes("1 + 1 + 1"))
val testStr = "1 * 1"
assertTrue(levFSA.recognizes(testStr))
assertTrue(testStr in simpleCFG.language)
// println(levCFG.corner(testStr))
}
}

0 comments on commit c20e49e

Please sign in to comment.