diff --git a/internal/gaby/main.go b/internal/gaby/main.go index 386214c..a5fd812 100644 --- a/internal/gaby/main.go +++ b/internal/gaby/main.go @@ -174,7 +174,7 @@ func main() { g.embed = ai g.llm = ai g.llmapp = llmapp.NewWithChecker(g.slog, ai, g.policy, g.db) - g.overview = overview.New(g.llmapp, g.github) + g.overview = overview.New(g.slog, g.db, g.github, g.llmapp, "overview", "gabyhelp") cr := crawl.New(g.slog, g.db, g.http) cr.Add("https://go.dev/") diff --git a/internal/gaby/overview_test.go b/internal/gaby/overview_test.go index ef56ec6..63fe999 100644 --- a/internal/gaby/overview_test.go +++ b/internal/gaby/overview_test.go @@ -37,7 +37,7 @@ func TestPopulateOverviewPage(t *testing.T) { vector: storage.MemVectorDB(db, lg, "vector"), github: github.New(lg, db, secret.Empty(), nil), llmapp: lc, - overview: overview.New(lc, gh), + overview: overview.New(lg, db, gh, lc, "test", "test-bot"), docs: docs.New(lg, db), embed: llm.QuoteEmbedder(), } @@ -118,6 +118,7 @@ func TestPopulateOverviewPage(t *testing.T) { Raw: wantIssueResult.Overview, Typed: &overview.IssueResult{ TotalComments: 2, + LastComment: comment2.CommentID(), Overview: wantIssueResult.Overview, }, Issue: iss1, @@ -143,6 +144,7 @@ func TestPopulateOverviewPage(t *testing.T) { Raw: wantIssueResult.Overview, Typed: &overview.IssueResult{ TotalComments: 2, + LastComment: comment2.CommentID(), Overview: wantIssueResult.Overview, }, Issue: iss1, @@ -195,6 +197,7 @@ func TestPopulateOverviewPage(t *testing.T) { Typed: &overview.IssueUpdateResult{ NewComments: 1, TotalComments: 2, + LastComment: comment2.CommentID(), Overview: wantUpdateResult.Overview, }, Issue: iss1, diff --git a/internal/github/llm.go b/internal/github/llm.go index c82eaf2..ae15b35 100644 --- a/internal/github/llm.go +++ b/internal/github/llm.go @@ -14,7 +14,7 @@ func (i *Issue) ToLLMDoc() *llmapp.Doc { return &llmapp.Doc{ Type: "issue", URL: i.HTMLURL, - Author: i.User.Login, + Author: i.User.ForDisplay(), Title: i.Title, Text: i.Body, } @@ -26,8 +26,16 @@ func (ic *IssueComment) ToLLMDoc() *llmapp.Doc { return &llmapp.Doc{ Type: "issue comment", URL: ic.HTMLURL, - Author: ic.User.Login, + Author: ic.User.ForDisplay(), // no title Text: ic.Body, } } + +// ForDisplay returns the user's login username, prefixed with @. +func (u *User) ForDisplay() string { + if u.Login == "" { + return "" + } + return "@" + u.Login +} diff --git a/internal/overview/client.go b/internal/overview/client.go index aca8577..c0dddd2 100644 --- a/internal/overview/client.go +++ b/internal/overview/client.go @@ -4,23 +4,148 @@ // Package overview generates and posts overviews of discussions. // For now, it only works with GitHub issues and their comments. -// TODO(tatianabradley): Implement posting logic. +// TODO(tatianabradley): Add comment explaining design. package overview import ( + "context" + "encoding/json" + "log/slog" + "time" + "golang.org/x/oscar/internal/github" "golang.org/x/oscar/internal/llmapp" + "golang.org/x/oscar/internal/storage" + "rsc.io/ordered" ) +// A Client is used to generate, post, and update AI-generated overviews +// of GitHub issues and their comments. type Client struct { - lc *llmapp.Client - gh *github.Client + slog *slog.Logger + db storage.DB // the database to use to store state + minTimeBetweenUpdates time.Duration // the minimum time between calls to [poster.run] + + g *generator // for generating overviews + p *poster // for modifying GitHub +} + +// New returns a new Client used to generate and post overviews to GitHub. +// Name is a string used to identify the Client, and bot is the login of the +// GitHub user that will modify GitHub. +// Clients with the same name and bot use the same state. +func New(lg *slog.Logger, db storage.DB, gh *github.Client, lc *llmapp.Client, name, bot string) *Client { + c := &Client{ + slog: lg, + db: db, + minTimeBetweenUpdates: defaultMinTimeBetweenUpdates, + g: newGenerator(gh, lc), + p: newPoster(name, bot), + } + c.g.skipCommentsBy(bot) + return c +} + +var defaultMinTimeBetweenUpdates = 24 * time.Hour + +// Run posts and updates AI-generated overviews of GitHub issues. +// +// TODO(tatianabradley): Detailed comment. +func (c *Client) Run(ctx context.Context) error { + c.slog.Info("overview.Run start") + defer func() { + c.slog.Info("overview.Run end") + }() + + // Check if we should run or not. + c.db.Lock(string(c.runKey())) + defer c.db.Unlock(string(c.runKey())) + + lr, err := c.lastRun() + if err != nil { + return err + } + if time.Since(lr) < c.minTimeBetweenUpdates { + c.slog.Info("overview.Run: skipped (last successful run happened too recently)", "last run", lr, "min time", c.minTimeBetweenUpdates) + return nil + } + if err := c.p.run(); err != nil { + return err + } + + c.setLastRun(time.Now()) + return nil +} + +// ForIssue returns an LLM-generated overview of the issue and its comments. +// It does not make any requests to, or modify, GitHub; the issue and comment data must already +// be stored in the database. +func (c *Client) ForIssue(ctx context.Context, iss *github.Issue) (*IssueResult, error) { + return c.g.issue(ctx, iss) +} + +// ForIssueUpdate returns an LLM-generated overview of the issue and its +// comments, separating the comments into "old" and "new" groups broken +// by the specifed lastRead comment id. (The lastRead comment itself is +// considered "old", and must be present in the database). +// +// ForIssueUpdate does not make any requests to, or modify, GitHub; the issue and comment data must already +// be stored in db. +func (c *Client) ForIssueUpdate(ctx context.Context, iss *github.Issue, lastRead int64) (*IssueUpdateResult, error) { + return c.g.issueUpdate(ctx, iss, lastRead) +} + +// EnableProject enables the Client to post on and update issues in the given +// GitHub project (for example "golang/go"). +func (c *Client) EnableProject(project string) { + c.p.projects[project] = true +} + +// RequireApproval configures the poster to require approval for all actions. +func (c *Client) RequireApproval() { + c.p.requireApproval = true +} + +// AutoApprove configures the Client to auto-approve all its actions. +func (c *Client) AutoApprove() { + c.p.requireApproval = false +} + +// SetMinComments sets the minimum number of comments a post needs to get an +// overview comment. +func (c *Client) SetMinComments(n int) { + c.p.minComments = n } -// New returns a new Client used to generate and post overviews. -func New(lc *llmapp.Client, gh *github.Client) *Client { - return &Client{ - lc: lc, - gh: gh, +type state struct { + LastRun string // the time of the last sucessful (non-skipped) call to [Client.Run] +} + +// lastRun returns the time of the last successful (non-skipped) +// call to [Client.Run]. +func (c *Client) lastRun() (time.Time, error) { + b, ok := c.db.Get(c.runKey()) + if !ok { + return time.Time{}, nil } + var s state + if err := json.Unmarshal(b, &s); err != nil { + return time.Time{}, err + } + return time.Parse(time.RFC3339, s.LastRun) +} + +// setLastRun sets the time of the last successful (non-skipped) +// call to [Client.Run]. +func (c *Client) setLastRun(t time.Time) { + c.db.Set(c.runKey(), storage.JSON(&state{ + LastRun: t.Format(time.RFC3339), + })) +} + +// runKey returns the key to use to store the state for this Client. +func (c *Client) runKey() []byte { + return ordered.Encode(runKind, c.p.name, c.p.bot) } + +const runKind = "overview.Run" diff --git a/internal/overview/issue.go b/internal/overview/issue.go index 7fcfead..987670f 100644 --- a/internal/overview/issue.go +++ b/internal/overview/issue.go @@ -12,71 +12,146 @@ import ( "golang.org/x/oscar/internal/llmapp" ) +type generator struct { + gh *github.Client + lc *llmapp.Client + ignores []func(*github.IssueComment) bool // ignore these comments when generating overviews +} + +func newGenerator(gh *github.Client, lc *llmapp.Client) *generator { + return &generator{ + gh: gh, + lc: lc, + } +} + +// skipCommentsBy configures the generator to ignore comments posted +// by the given GitHub user when generating issue overviews. +func (g *generator) skipCommentsBy(login string) { + g.ignores = append(g.ignores, func(ic *github.IssueComment) bool { + return ic.User.Login == login + }) +} + // IssueResult is the result of [Client.ForIssue]. // It contains the generated overview and metadata about the issue. type IssueResult struct { - TotalComments int // number of comments for this issue - Overview *llmapp.Result // the LLM-generated issue and comment summary + TotalComments int // total number of comments for this issue + LastComment int64 // ID of the highest-numbered comment present for this issue + SkippedComments int // number of comments not included in the summary + Overview *llmapp.Result // the LLM-generated issue and comment summary } -// ForIssue returns an LLM-generated overview of the issue and its comments. -// It does not make any requests to or modify, GitHub; the issue and comment data must already -// be stored in the database. -func (c *Client) ForIssue(ctx context.Context, iss *github.Issue) (*IssueResult, error) { +// See comment on [Client.ForIssue]. +func (g *generator) issue(ctx context.Context, iss *github.Issue) (*IssueResult, error) { post := iss.ToLLMDoc() - var cs []*llmapp.Doc - for c := range c.gh.Comments(iss) { - cs = append(cs, c.ToLLMDoc()) + var cds []*llmapp.Doc + m := g.newIssueMeta() + for ic := range g.gh.Comments(iss) { + if m.add(ic) { + continue + } + cds = append(cds, ic.ToLLMDoc()) } - overview, err := c.lc.PostOverview(ctx, post, cs) + overview, err := g.lc.PostOverview(ctx, post, cds) if err != nil { return nil, err } return &IssueResult{ - TotalComments: len(cs), - Overview: overview, + TotalComments: m.TotalComments, + SkippedComments: m.SkippedComments, + LastComment: m.LastComment, + Overview: overview, }, nil } +// ignore reports whether the given issue comment should be ignored +// when generating issue overviews. +func (g *generator) ignore(ic *github.IssueComment) bool { + for _, ig := range g.ignores { + if ig(ic) { + return true + } + } + return false +} + // IssueUpdateResult is the result of [Client.ForIssueUpdate]. // It contains the generated overview and metadata about the issue. type IssueUpdateResult struct { - NewComments int // number of new comments for this issue - TotalComments int // number of comments for this issue - Overview *llmapp.Result // the LLM-generated issue and comment summary + TotalComments int + LastComment int64 + SkippedComments int + + NewComments int // number of new comments used in the summary + Overview *llmapp.Result // the LLM-generated issue and comment summary } -// ForIssueUpdate returns an LLM-generated overview of the issue and its -// comments, separating the comments into "old" and "new" groups broken -// by the specifed lastRead comment id. (The lastRead comment itself is -// considered "old", and must be present in the database). -// ForIssueUpdate does not make any requests to GitHub; the issue and comment data must already -// be stored in db. -func (c *Client) ForIssueUpdate(ctx context.Context, iss *github.Issue, lastRead int64) (*IssueUpdateResult, error) { +// See comment on [Client.ForIssueUpdate]. +func (g *generator) issueUpdate(ctx context.Context, iss *github.Issue, lastRead int64) (*IssueUpdateResult, error) { post := iss.ToLLMDoc() var oldComments, newComments []*llmapp.Doc foundTarget := false - for c := range c.gh.Comments(iss) { + m := g.newIssueMeta() + for ic := range g.gh.Comments(iss) { + if ignore := m.add(ic); ignore { + continue + } // New comment. - if c.CommentID() > lastRead { - newComments = append(newComments, c.ToLLMDoc()) + if ic.CommentID() > lastRead { + newComments = append(newComments, ic.ToLLMDoc()) continue } - if c.CommentID() == lastRead { + if ic.CommentID() == lastRead { foundTarget = true } - oldComments = append(oldComments, c.ToLLMDoc()) + oldComments = append(oldComments, ic.ToLLMDoc()) } if !foundTarget { return nil, fmt.Errorf("issue %d comment %d not found in database", iss.Number, lastRead) } - overview, err := c.lc.UpdatedPostOverview(ctx, post, oldComments, newComments) + overview, err := g.lc.UpdatedPostOverview(ctx, post, oldComments, newComments) if err != nil { return nil, err } return &IssueUpdateResult{ - NewComments: len(newComments), - TotalComments: len(oldComments) + len(newComments), - Overview: overview, + NewComments: len(newComments), + TotalComments: m.TotalComments, + SkippedComments: m.SkippedComments, + LastComment: m.LastComment, + Overview: overview, }, nil } + +// issueMeta contains metadata about a [github.Issue] that can't be +// determined from the issue itself. +type issueMeta struct { + TotalComments int // total number of comments for this issue + LastComment int64 // ID of the highest-numbered comment present for this issue + SkippedComments int // number of ignored comments (by ignore) + + // comments to ignore (must be set before any calls to [issueMeta.add]) + ignore func(*github.IssueComment) bool +} + +// newIssueMeta creates a new issueMeta with the same ignore +// function as the generator. +func (g *generator) newIssueMeta() *issueMeta { + return &issueMeta{ + ignore: g.ignore, + } +} + +// add updates the issueMeta to include the given comment, +// and reports whether the comment should be ignored. +func (i *issueMeta) add(ic *github.IssueComment) (ignore bool) { + i.TotalComments++ + if ic.CommentID() > i.LastComment { + i.LastComment = ic.CommentID() + } + if i.ignore != nil && i.ignore(ic) { + i.SkippedComments++ + return true + } + return false +} diff --git a/internal/overview/issue_test.go b/internal/overview/issue_test.go index c2649a6..7406d10 100644 --- a/internal/overview/issue_test.go +++ b/internal/overview/issue_test.go @@ -38,7 +38,7 @@ func TestIssue(t *testing.T) { check(gh.Sync(ctx)) lc := llmapp.New(lg, llm.EchoContentGenerator(), db) - c := New(lc, gh) + c := New(lg, db, gh, lc, "test-name", "test-bot") issue := &github.Issue{ URL: "https://api.github.com/repos/robpike/ivy/issues/19", @@ -70,7 +70,7 @@ could you get me a hand! &llmapp.Doc{ Type: "issue", URL: "https://github.com/robpike/ivy/issues/19", - Author: "xunshicheng", + Author: "@xunshicheng", Title: issue.Title, Text: issue.Body, }, @@ -78,7 +78,7 @@ could you get me a hand! { Type: "issue comment", URL: "https://github.com/robpike/ivy/issues/19#issuecomment-169157303", - Author: "robpike", + Author: "@robpike", Text: `See the import comment, or listen to the error message. Ivy uses a custom import. go get robpike.io/ivy @@ -93,6 +93,7 @@ It is a fair point though that this should be explained in the README. I will fi want := &IssueResult{ Overview: wantOverview, + LastComment: 169157303, TotalComments: 1, } @@ -108,16 +109,20 @@ func TestIssueUpdate(t *testing.T) { sdb := secret.Empty() gh := github.New(lg, db, sdb, nil) lc := llmapp.New(lg, llm.EchoContentGenerator(), db) - c := New(lc, gh) + c := New(lg, db, gh, lc, "test-name", "test-bot") proj := "hello/world" iss := &github.Issue{Number: 1} comment := &github.IssueComment{} comment2 := &github.IssueComment{} + comment3 := &github.IssueComment{ + User: github.User{Login: "test-bot"}, + } gh.Testing().AddIssue(proj, iss) gh.Testing().AddIssueComment(proj, iss.Number, comment) gh.Testing().AddIssueComment(proj, iss.Number, comment2) + gh.Testing().AddIssueComment(proj, iss.Number, comment3) got, err := c.ForIssueUpdate(ctx, iss, comment.CommentID()) if err != nil { @@ -149,9 +154,11 @@ func TestIssueUpdate(t *testing.T) { } want := &IssueUpdateResult{ - NewComments: 1, - TotalComments: 2, - Overview: wantOverview, + NewComments: 1, + TotalComments: 3, + SkippedComments: 1, + LastComment: comment3.CommentID(), + Overview: wantOverview, } if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(llmapp.Result{}, "Cached")); diff != "" { diff --git a/internal/overview/poster.go b/internal/overview/poster.go new file mode 100644 index 0000000..9d9169e --- /dev/null +++ b/internal/overview/poster.go @@ -0,0 +1,26 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package overview + +import "errors" + +// TODO(tatianabradley): Implement. +type poster struct { + name string // the name of the poster (for internal state) + bot string // the name of the GitHub bot that will post + minComments int + requireApproval bool + projects map[string]bool +} + +// TODO(tatianabradley): Implement. +func newPoster(name, bot string) *poster { + return &poster{name: name, bot: bot} +} + +// TODO(tatianabradley): Implement. +func (*poster) run() error { + return errors.New("not implemented") +}