From 5add783e89fad10069df63ef4314affb6d002b61 Mon Sep 17 00:00:00 2001 From: Ori Bernstein Date: Sat, 5 Dec 2020 17:51:20 +0000 Subject: [PATCH] git/send: smaller packfiles when pushing When pushing to a remote repository, we would pack at least all objects in one commit, even if the remote repository had most of them. This change prunes out all objects from the last commit that they have, which should drop the size of the generated pack file noticably for the common case of pushing a commit or that changes a small percentage of files in a larger repository. --- git.h | 2 +- pack.c | 61 ++++++++++++++++++++++++++++++++++---------------------- repack.c | 8 ++------ send.c | 8 +++----- serve.c | 8 ++------ 5 files changed, 45 insertions(+), 42 deletions(-) diff --git a/git.h b/git.h index cb6c5e8..533f1bc 100644 --- a/git.h +++ b/git.h @@ -248,7 +248,7 @@ Object *readobject(Hash); Object *clearedobject(Hash, int); void parseobject(Object *); int indexpack(char *, char *, Hash); -int writepack(int, Object **, int, Hash*); +int writepack(int, Hash*, int, Hash*, int, Hash*); int hasheq(Hash *, Hash *); Object *ref(Object *); void unref(Object *); diff --git a/pack.c b/pack.c index e760470..4f2186f 100644 --- a/pack.c +++ b/pack.c @@ -1167,10 +1167,15 @@ writeordercmp(void *pa, void *pb) } static void -addmeta(Objmeta ***meta, int *nmeta, Object *o, char *path, vlong mtime) +addmeta(Objmeta ***meta, int *nmeta, Objset *has, Object *o, char *path, vlong mtime) { Objmeta *m; + if(oshas(has, o->hash)) + return; + osadd(has, o); + if(meta == nil) + return; m = emalloc(sizeof(Objmeta)); m->obj = o; m->path = estrdup(path); @@ -1201,8 +1206,7 @@ loadtree(Objmeta ***m, int *nm, Objset *has, Hash tree, char *dpath, vlong mtime return 0; if((t = readobject(tree)) == nil) return -1; - osadd(has, t); - addmeta(m, nm, t, dpath, mtime); + addmeta(m, nm, has, t, dpath, mtime); for(i = 0; i < t->tree->nent; i++){ e = &t->tree->ent[i]; if(oshas(has, e->h)) @@ -1212,10 +1216,9 @@ loadtree(Objmeta ***m, int *nm, Objset *has, Hash tree, char *dpath, vlong mtime k = (e->mode & DMDIR) ? GTree : GBlob; o = clearedobject(e->h, k); p = smprint("%s/%s", dpath, e->name); - if(k == GBlob){ - osadd(has, o); - addmeta(m, nm, o, p, mtime); - }else if(loadtree(m, nm, has, e->h, p, mtime) == -1){ + if(k == GBlob) + addmeta(m, nm, has, o, p, mtime); + else if(loadtree(m, nm, has, e->h, p, mtime) == -1){ free(p); return -1; } @@ -1235,31 +1238,39 @@ loadcommit(Objmeta ***m, int *nm, Objset *has, Hash h) return 0; if((c = readobject(h)) == nil) return -1; - osadd(has, c); - addmeta(m, nm, c, "", c->commit->ctime); + addmeta(m, nm, has, c, "", c->commit->ctime); r = loadtree(m, nm, has, c->commit->tree, "", c->commit->ctime); unref(c); return r; } static int -readmeta(Object **commits, int ncommits, Objmeta ***m) +readmeta(Hash *theirs, int ntheirs, Hash *ours, int nours, Objmeta ***m) { + Object **obj; Objset has; - int i, nm; + int i, nm, nobj; *m = nil; nm = 0; osinit(&has); - for(i = 0; i < ncommits; i++){ - dprint(2, "loading commit %H\n", commits[i]->hash); - if(loadcommit(m, &nm, &has, commits[i]->hash) == -1){ - free(*m); - return -1; - } - } + if(findtwixt(theirs, ntheirs, ours, nours, &obj, &nobj) == -1) + sysfatal("load twixt: %r"); + if(nobj == 0) + return 0; + for(i = 0; i < nours; i++) + if(!hasheq(&ours[i], &Zhash)) + if(loadcommit(nil, nil, &has, ours[i]) == -1) + goto out; + for(i = 0; i < nobj; i++) + if(loadcommit(m, &nm, &has, obj[i]->hash) == -1) + goto out; osclear(&has); return nm; +out: + osclear(&has); + free(*m); + return -1; } static int @@ -1281,12 +1292,14 @@ pickdeltas(Objmeta **meta, int nmeta) int i, j, x, nd, sz, pcnt, best; pcnt = 0; - fprint(2, "deltifying %d objects: 0%%", nmeta); + dprint(1, "picking deltas\n"); + if(interactive) + fprint(2, "deltifying %d objects: 0%%", nmeta); qsort(meta, nmeta, sizeof(Objmeta*), deltaordercmp); for(i = 0; i < nmeta; i++){ m = meta[i]; x = (i*100) / nmeta; - if(x > pcnt){ + if(interactive && x > pcnt){ pcnt = x; if(pcnt%10 == 0) fprint(2, "\b\b\b\b%3d%%", pcnt); @@ -1561,15 +1574,15 @@ genpack(int fd, Objmeta **meta, int nmeta, Hash *h, int odelta) } int -writepack(int fd, Object **obj, int nobj, Hash *h) +writepack(int fd, Hash *theirs, int ntheirs, Hash *ours, int nours, Hash *h) { Objmeta **meta; int i, r, nmeta; - dprint(1, "reading meta\n"); - if((nmeta = readmeta(obj, nobj, &meta)) == -1) + if((nmeta = readmeta(theirs, ntheirs, ours, nours, &meta)) == -1) return -1; - dprint(1, "picking deltas\n"); + if(nmeta == 0) + return 0; pickdeltas(meta, nmeta); dprint(1, "generating pack\n"); r = genpack(fd, meta, nmeta, h, 0); diff --git a/repack.c b/repack.c index 717a450..601231b 100644 --- a/repack.c +++ b/repack.c @@ -47,9 +47,8 @@ void main(int argc, char **argv) { char path[128], **names; - int fd, nobj, nrefs; + int fd, nrefs; Hash *refs, h; - Object **obj; Dir rn; ARGBEGIN{ @@ -64,13 +63,10 @@ main(int argc, char **argv) refs = nil; if((nrefs = listrefs(&refs, &names)) == -1) sysfatal("load refs: %r"); - if(findtwixt(refs, nrefs, nil, 0, &obj, &nobj) == -1) - sysfatal("load twixt: %r"); if((fd = create(TMPPATH("pack.tmp"), OWRITE, 0644)) == -1) sysfatal("open %s: %r", TMPPATH("pack.tmp")); - if(writepack(fd, obj, nobj, &h) == -1) + if(writepack(fd, refs, nrefs, nil, 0, &h) == -1) sysfatal("writepack: %r"); - free(obj); if(indexpack(TMPPATH("pack.tmp"), TMPPATH("idx.tmp"), h) == -1) sysfatal("indexpack: %r"); close(fd); diff --git a/send.c b/send.c index 79cf300..556e2ee 100644 --- a/send.c +++ b/send.c @@ -99,10 +99,10 @@ parsecaps(char *caps, Capset *cs) int sendpack(Conn *c) { - int i, n, r, idx, nupd, nobj, nsp, send, first; + int i, n, r, idx, nupd, nsp, send, first; char buf[Pktmax], *sp[3]; Hash h, *theirs, *ours; - Object *a, *b, *p, **obj; + Object *a, *b, *p; char **refs; Capset cs; @@ -187,9 +187,7 @@ sendpack(Conn *c) if(!send) print("nothing to send\n"); if(send){ - if(findtwixt(ours, nupd, theirs, nupd, &obj, &nobj) == -1) - return -1; - if(writepack(c->wfd, obj, nobj, &h) == -1) + if(writepack(c->wfd, ours, nupd, theirs, nupd, &h) == -1) return -1; if(cs.report && readphase(c) == -1) return -1; diff --git a/serve.c b/serve.c index 6f506cf..d506601 100644 --- a/serve.c +++ b/serve.c @@ -138,17 +138,13 @@ int servpack(Conn *c) { Hash *head, *tail, h; - Object **obj; - int nhead, ntail, nobj; + int nhead, ntail; dprint(1, "negotiating pack\n"); if(servnegotiate(c, &head, &nhead, &tail, &ntail) == -1) sysfatal("negotiate: %r"); - dprint(1, "finding twixt\n"); - if(findtwixt(head, nhead, tail, ntail, &obj, &nobj) == -1) - sysfatal("twixt: %r"); dprint(1, "writing pack\n"); - if(nobj > 0 && writepack(c->wfd, obj, nobj, &h) == -1) + if(writepack(c->wfd, head, nhead, tail, ntail, &h) == -1) sysfatal("send: %r"); return 0; }