#!/bin/bash set -eou pipefail # Example: # ./workspace-pack-hg-8.sh https://hg.openjdk.java.net/jdk8u/jdk8u jdk8u-jdk8u # Enable aggressive repository format for Mercurial. Cloning the repository into # this format would take a while. This is mostly convenient for the CI jobs that # repackage the workspaces, but not for user clones HG_OPTS=" --config=format.generaldelta=1 --config=format.aggressivemergedeltas=1" URL=$1 NAME=$2 UPSTREAM_NAME=$NAME-upstream # Clone the repository, if it does not exist yet. The particular trouble with # forest repository is that we need to update the root repository to get the # forest management script, which can then be used to clone the subrepos. if [ ! -d $UPSTREAM_NAME ]; then hg clone $URL $UPSTREAM_NAME cd $UPSTREAM_NAME hg update HGFOREST_GLOBALOPTS="${HG_OPTS}" sh common/bin/hgforest.sh clone cd .. fi # Perform the very first pull of the repository. This would already merge some # metadata using general+aggressive deltas. Make sure repository does not have # working copy checked up, only the metadata itself. Again, we need to get root # repository in order before dealing with forests. cd $UPSTREAM_NAME hg pull hg update HGFOREST_GLOBALOPTS="${HG_OPTS}" sh common/bin/hgforest.sh pull sh common/bin/hgforest.sh update null hg update null cd .. # Perform the *second* clone of the repository. This apparently repackages the # manifest and metadata to a much more compact form. Make sure working copy is # not materialized, an that we use the non-local clone that actually rewires # metadata. This clone would record the "local" repo as parent, overwrite it # with the upstream repo URL. rm -rf $NAME hg $HG_OPTS clone --pull --uncompressed -U $UPSTREAM_NAME $NAME echo -e "[paths]\ndefault = $URL" > $NAME/.hg/hgrc for R in corba jaxp jaxws jdk hotspot langtools nashorn; do if [ -d $UPSTREAM_NAME/$R ]; then hg $HG_OPTS clone --pull --uncompressed -U $UPSTREAM_NAME/$R $NAME/$R & fi done wait for R in corba jaxp jaxws jdk hotspot langtools nashorn; do if [ -d $NAME/$R ]; then echo -e "[paths]\ndefault = $URL/$R" > $NAME/$R/.hg/hgrc fi done # Package the repository. In the presence of bulk file moves, the stream gets # much more compressible if we stack similar files together. The default order # comes from the filesystem walk, and might not be ideal for this. The good proxy # for similarity is the file name: it captures lots of moves of the same file, # and clusters similar files together. Create the sorted list of paths, # and use that order to create the tarball. find $NAME/ -type f | \ awk -F '/' '{ k = $(NF); sub(/ /, "", k); print k " " $L; }' | \ sort | \ cut -d" " -f 2- > list.txt tar -c -f $NAME.tar -T list.txt # Finally, compress the whole thing with the max compression available. This # probably clashes with Mercurial's own storage compression, and it would be # interesting to somehow request the metadata compressor to unpack the repository # internal metadata for this compressor to work on raw stream. That is what # "hg bundle" apparently does? We are working with the repository copy itself, # so we have to be content with whatever compression we can get. rm -f $NAME.tar.xz xz -9 $NAME.tar # Verification: unpack the resulting tarball and check repository is still fine rm -rf temp mkdir temp cd temp tar xJf ../$NAME.tar.xz cd $NAME hg update hg verify sh common/bin/hgforest.sh update sh common/bin/hgforest.sh verify cd ../../ rm -rf temp # Sample jdk8u/jdk8u tarball sizes: # - 326M uncompressed, 209M compressed originally # - 322M uncompressed, 208M compressed after file reordering