aboutsummaryrefslogtreecommitdiff
blob: 72e2ba11e8aa7556b6d265a39495d5034d682a77 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
# (Be in -*- python -*- mode.)
#
# ====================================================================
# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.  The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals.  For exact contribution history, see the revision
# history and logs, available at http://cvs2svn.tigris.org/.
# ====================================================================

"""This module contains the RepositoryMirror class and supporting classes.

RepositoryMirror represents the skeleton of a versioned file tree with
multiple lines of development ('LODs').  It records the presence or
absence of files and directories, but not their contents.  Given three
values (revnum, lod, cvs_path), it can tell you whether the specified
CVSPath existed on the specified LOD in the given revision number.
The file trees corresponding to the most recent revision can be
modified.

The individual file trees are stored using immutable tree structures.
Each directory node is represented as a MirrorDirectory instance,
which is basically a map {cvs_path : node_id}, where cvs_path is a
CVSPath within the directory, and node_id is an integer ID that
uniquely identifies another directory node if that node is a
CVSDirectory, or None if that node is a CVSFile.  If a directory node
is to be modified, then first a new node is created with a copy of the
original node's contents, then the copy is modified.  A reference to
the copy also has to be stored in the parent node, meaning that the
parent node needs to be modified, and so on recursively to the root
node of the file tree.  This data structure allows cheap deep copies,
which is useful for tagging and branching.

The class must also be able to find the root directory node
corresponding to a particular (revnum, lod).  This is done by keeping
an LODHistory instance for each LOD, which can determine the root
directory node ID for that LOD for any revnum.  It does so by
recording changes to the root directory node ID only for revisions in
which it changed.  Thus it stores two arrays, revnums (a list of the
revision numbers when the ID changed), and ids (a list of the
corresponding IDs).  To find the ID for a particular revnum, first a
binary search is done in the revnums array to find the index of the
last change preceding revnum, then the corresponding ID is read from
the ids array.  Since most revisions change only one LOD, this allows
storage of the history of potentially tens of thousands of LODs over
hundreds of thousands of revisions in an amount of space that scales
as O(numberOfLODs + numberOfRevisions), rather than O(numberOfLODs *
numberOfRevisions) as would be needed if the information were stored
in the equivalent of a 2D array.

The internal operation of these classes is somewhat intricate, but the
interface attempts to hide the complexity, enforce the usage rules,
and allow efficient access.  The most important facts to remember are
(1) that a directory node can be used for multiple purposes (for
multiple branches and for multiple revisions on a single branch), (2)
that only a node that has been created within the current revision is
allowed to be mutated, and (3) that the current revision can include
nodes carried over from prior revisions, which are immutable.

This leads to a bewildering variety of MirrorDirectory classes.  The
most important distinction is between OldMirrorDirectories and
CurrentMirrorDirectories.  A single node can be represented multiple
ways in memory at the same time, depending on whether it was looked up
as part of the current revision or part of an old revision:

    MirrorDirectory -- the base class for all MirrorDirectory nodes.
        This class allows lookup of subnodes and iteration over
        subnodes.

    OldMirrorDirectory -- a MirrorDirectory that was looked up for an
        old revision.  These instances are immutable, as only the
        current revision is allowed to be modified.

    CurrentMirrorDirectory -- a MirrorDirectory that was looked up for
        the current revision.  Such an instance is always logically
        mutable, though mutating it might require the node to be
        copied first.  Such an instance might represent a node that
        has already been copied during this revision and can therefore
        be modified freely (such nodes implement
        _WritableMirrorDirectoryMixin), or it might represent a node
        that was carried over from an old revision and hasn't been
        copied yet (such nodes implement
        _ReadOnlyMirrorDirectoryMixin).  If the latter, then the node
        copies itself (and bubbles up the change) before allowing
        itself to be modified.  But the distinction is managed
        internally; client classes should not have to worry about it.

    CurrentMirrorLODDirectory -- A CurrentMirrorDirectory representing
        the root directory of a line of development in the current
        revision.  This class has two concrete subclasses,
        _CurrentMirrorReadOnlyLODDirectory and
        _CurrentMirrorWritableLODDirectory, depending on whether the
        node has already been copied during this revision.


    CurrentMirrorSubdirectory -- A CurrentMirrorDirectory representing
        a subdirectory within a line of development's directory tree
        in the current revision.  This class has two concrete
        subclasses, _CurrentMirrorReadOnlySubdirectory and
        _CurrentMirrorWritableSubdirectory, depending on whether the
        node has already been copied during this revision.

    DeletedCurrentMirrorDirectory -- a MirrorDirectory that has been
        deleted.  Such an instance is disabled so that it cannot
        accidentally be used.

While a revision is being processed, RepositoryMirror._new_nodes holds
every writable CurrentMirrorDirectory instance (i.e., every node that
has been created in the revision).  Since these nodes are mutable, it
is important that there be exactly one instance associated with each
node; otherwise there would be problems keeping the instances
synchronized.  These are written to the database by
RepositoryMirror.end_commit().

OldMirrorDirectory and read-only CurrentMirrorDirectory instances are
*not* cached; they are recreated whenever they are referenced.  There
might be multiple instances referring to the same node.  A read-only
CurrentMirrorDirectory instance is mutated in place into a writable
CurrentMirrorDirectory instance if it needs to be modified.

FIXME: The rules for when a MirrorDirectory instance can continue to
be used vs. when it has to be read again (because it has been modified
indirectly and therefore copied) are confusing and error-prone.
Probably the semantics should be changed.

"""


import bisect

from cvs2svn_lib import config
from cvs2svn_lib.common import DB_OPEN_NEW
from cvs2svn_lib.common import InternalError
from cvs2svn_lib.log import Log
from cvs2svn_lib.context import Ctx
from cvs2svn_lib.cvs_file import CVSFile
from cvs2svn_lib.cvs_file import CVSDirectory
from cvs2svn_lib.key_generator import KeyGenerator
from cvs2svn_lib.artifact_manager import artifact_manager
from cvs2svn_lib.serializer import MarshalSerializer
from cvs2svn_lib.database import IndexedDatabase


class RepositoryMirrorError(Exception):
  """An error related to the RepositoryMirror."""

  pass


class LODExistsError(RepositoryMirrorError):
  """The LOD already exists in the repository.

  Exception raised if an attempt is made to add an LOD to the
  repository mirror and that LOD already exists in the youngest
  revision of the repository."""

  pass


class PathExistsError(RepositoryMirrorError):
  """The path already exists in the repository.

  Exception raised if an attempt is made to add a path to the
  repository mirror and that path already exists in the youngest
  revision of the repository."""

  pass


class DeletedNodeReusedError(RepositoryMirrorError):
  """The MirrorDirectory has already been deleted and shouldn't be reused."""

  pass


class CopyFromCurrentNodeError(RepositoryMirrorError):
  """A CurrentMirrorDirectory cannot be copied to the current revision."""

  pass


class MirrorDirectory(object):
  """Represent a node within the RepositoryMirror.

  Instances of this class act like a map {CVSPath : MirrorDirectory},
  where CVSPath is an item within this directory (i.e., a file or
  subdirectory within this directory).  The value is either another
  MirrorDirectory instance (for directories) or None (for files)."""

  def __init__(self, repo, id, entries):
    # The RepositoryMirror containing this directory:
    self.repo = repo

    # The id of this node:
    self.id = id

    # The entries within this directory, stored as a map {CVSPath :
    # node_id}.  The node_ids are integers for CVSDirectories, None
    # for CVSFiles:
    self._entries = entries

  def __getitem__(self, cvs_path):
    """Return the MirrorDirectory associated with the specified subnode.

    Return a MirrorDirectory instance if the subnode is a
    CVSDirectory; None if it is a CVSFile.  Raise KeyError if the
    specified subnode does not exist."""

    raise NotImplementedError()

  def __len__(self):
    """Return the number of CVSPaths within this node."""

    return len(self._entries)

  def __contains__(self, cvs_path):
    """Return True iff CVS_PATH is contained in this node."""

    return cvs_path in self._entries

  def __iter__(self):
    """Iterate over the CVSPaths within this node."""

    return self._entries.__iter__()

  def _format_entries(self):
    """Format the entries map for output in subclasses' __repr__() methods."""

    def format_item(key, value):
      if value is None:
        return str(key)
      else:
        return '%s -> %x' % (key, value,)

    items = self._entries.items()
    items.sort()
    return '{%s}' % (', '.join([format_item(*item) for item in items]),)

  def __str__(self):
    """For convenience only.  The format is subject to change at any time."""

    return '%s<%x>' % (self.__class__.__name__, self.id,)


class OldMirrorDirectory(MirrorDirectory):
  """Represent a historical directory within the RepositoryMirror."""

  def __getitem__(self, cvs_path):
    id = self._entries[cvs_path]
    if id is None:
      # This represents a leaf node.
      return None
    else:
      return OldMirrorDirectory(self.repo, id, self.repo._node_db[id])

  def __repr__(self):
    """For convenience only.  The format is subject to change at any time."""

    return '%s(%s)' % (self, self._format_entries(),)


class CurrentMirrorDirectory(MirrorDirectory):
  """Represent a directory that currently exists in the RepositoryMirror."""

  def __init__(self, repo, id, lod, cvs_path, entries):
    MirrorDirectory.__init__(self, repo, id, entries)
    self.lod = lod
    self.cvs_path = cvs_path

  def __getitem__(self, cvs_path):
    id = self._entries[cvs_path]
    if id is None:
      # This represents a leaf node.
      return None
    else:
      try:
        return self.repo._new_nodes[id]
      except KeyError:
        return _CurrentMirrorReadOnlySubdirectory(
            self.repo, id, self.lod, cvs_path, self,
            self.repo._node_db[id]
            )

  def __setitem__(self, cvs_path, node):
    """Create or overwrite a subnode of this node.

    CVS_PATH is the path of the subnode.  NODE will be the new value
    of the node; for CVSDirectories it should be a MirrorDirectory
    instance; for CVSFiles it should be None."""

    if isinstance(node, DeletedCurrentMirrorDirectory):
      raise DeletedNodeReusedError(
          '%r has already been deleted and should not be reused' % (node,)
          )
    elif isinstance(node, CurrentMirrorDirectory):
      raise CopyFromCurrentNodeError(
          '%r was created in the current node and cannot be copied' % (node,)
          )
    else:
      self._set_entry(cvs_path, node)

  def __delitem__(self, cvs_path):
    """Remove the subnode of this node at CVS_PATH.

    If the node does not exist, then raise a KeyError."""

    node = self[cvs_path]
    self._del_entry(cvs_path)
    if isinstance(node, _WritableMirrorDirectoryMixin):
      node._mark_deleted()

  def mkdir(self, cvs_directory):
    """Create an empty subdirectory of this node at CVS_PATH.

    Return the CurrentDirectory that was created."""

    assert isinstance(cvs_directory, CVSDirectory)
    if cvs_directory in self:
      raise PathExistsError(
          'Attempt to create directory \'%s\' in %s in repository mirror '
          'when it already exists.'
          % (cvs_directory, self.lod,)
          )

    new_node = _CurrentMirrorWritableSubdirectory(
        self.repo, self.repo._key_generator.gen_id(), self.lod, cvs_directory,
        self, {}
        )
    self._set_entry(cvs_directory, new_node)
    self.repo._new_nodes[new_node.id] = new_node
    return new_node

  def add_file(self, cvs_file):
    """Create a file within this node at CVS_FILE."""

    assert isinstance(cvs_file, CVSFile)
    if cvs_file in self:
      raise PathExistsError(
          'Attempt to create file \'%s\' in %s in repository mirror '
          'when it already exists.'
          % (cvs_file, self.lod,)
          )

    self._set_entry(cvs_file, None)

  def __repr__(self):
    """For convenience only.  The format is subject to change at any time."""

    return '%s(%r, %r, %s)' % (
        self, self.lod, self.cvs_path, self._format_entries(),
        )


class DeletedCurrentMirrorDirectory(object):
  """A MirrorDirectory that has been deleted.

  A MirrorDirectory that used to be a _WritableMirrorDirectoryMixin
  but then was deleted.  Such instances are turned into this class so
  that nobody can accidentally mutate them again."""

  pass


class _WritableMirrorDirectoryMixin:
  """Mixin for MirrorDirectories that are already writable.

  A MirrorDirectory is writable if it has already been recreated
  during the current revision."""

  def _set_entry(self, cvs_path, node):
    """Create or overwrite a subnode of this node, with no checks."""

    if node is None:
      self._entries[cvs_path] = None
    else:
      self._entries[cvs_path] = node.id

  def _del_entry(self, cvs_path):
    """Remove the subnode of this node at CVS_PATH, with no checks."""

    del self._entries[cvs_path]

  def _mark_deleted(self):
    """Mark this object and any writable descendants as being deleted."""

    self.__class__ = DeletedCurrentMirrorDirectory

    for (cvs_path, id) in self._entries.iteritems():
      if id in self.repo._new_nodes:
        node = self[cvs_path]
        if isinstance(node, _WritableMirrorDirectoryMixin):
          # Mark deleted and recurse:
          node._mark_deleted()


class _ReadOnlyMirrorDirectoryMixin:
  """Mixin for a CurrentMirrorDirectory that hasn't yet been made writable."""

  def _make_writable(self):
    raise NotImplementedError()

  def _set_entry(self, cvs_path, node):
    """Create or overwrite a subnode of this node, with no checks."""

    self._make_writable()
    self._set_entry(cvs_path, node)

  def _del_entry(self, cvs_path):
    """Remove the subnode of this node at CVS_PATH, with no checks."""

    self._make_writable()
    self._del_entry(cvs_path)


class CurrentMirrorLODDirectory(CurrentMirrorDirectory):
  """Represent an LOD's main directory in the mirror's current version."""

  def __init__(self, repo, id, lod, entries):
    CurrentMirrorDirectory.__init__(
        self, repo, id, lod, lod.project.get_root_cvs_directory(), entries
        )

  def delete(self):
    """Remove the directory represented by this object."""

    lod_history = self.repo._get_lod_history(self.lod)
    assert lod_history.exists()
    lod_history.update(self.repo._youngest, None)
    self._mark_deleted()


class _CurrentMirrorReadOnlyLODDirectory(
          CurrentMirrorLODDirectory, _ReadOnlyMirrorDirectoryMixin
          ):
  """Represent an LOD's main directory in the mirror's current version."""

  def _make_writable(self):
    self.__class__ = _CurrentMirrorWritableLODDirectory
    # Create a new ID:
    self.id = self.repo._key_generator.gen_id()
    self.repo._new_nodes[self.id] = self
    self.repo._get_lod_history(self.lod).update(self.repo._youngest, self.id)
    self._entries = self._entries.copy()


class _CurrentMirrorWritableLODDirectory(
          CurrentMirrorLODDirectory, _WritableMirrorDirectoryMixin
          ):
  pass


class CurrentMirrorSubdirectory(CurrentMirrorDirectory):
  """Represent a subdirectory in the mirror's current version."""

  def __init__(self, repo, id, lod, cvs_path, parent_mirror_dir, entries):
    CurrentMirrorDirectory.__init__(self, repo, id, lod, cvs_path, entries)
    self.parent_mirror_dir = parent_mirror_dir

  def delete(self):
    """Remove the directory represented by this object."""

    del self.parent_mirror_dir[self.cvs_path]


class _CurrentMirrorReadOnlySubdirectory(
          CurrentMirrorSubdirectory, _ReadOnlyMirrorDirectoryMixin
          ):
  """Represent a subdirectory in the mirror's current version."""

  def _make_writable(self):
    self.__class__ = _CurrentMirrorWritableSubdirectory
    # Create a new ID:
    self.id = self.repo._key_generator.gen_id()
    self.repo._new_nodes[self.id] = self
    self.parent_mirror_dir._set_entry(self.cvs_path, self)
    self._entries = self._entries.copy()


class _CurrentMirrorWritableSubdirectory(
          CurrentMirrorSubdirectory, _WritableMirrorDirectoryMixin
          ):
  pass


class LODHistory(object):
  """The history of root nodes for a line of development.

  Members:

    _mirror -- (RepositoryMirror) the RepositoryMirror that manages
        this LODHistory.

    lod -- (LineOfDevelopment) the LOD described by this LODHistory.

    revnums -- (list of int) the revision numbers in which the id
        changed, in numerical order.

    ids -- (list of (int or None)) the ID of the node describing the
        root of this LOD starting at the corresponding revision
        number, or None if the LOD did not exist in that revision.

  To find the root id for a given revision number, a binary search is
  done within REVNUMS to find the index of the most recent revision at
  the time of REVNUM, then that index is used to read the id out of
  IDS.

  A sentry is written at the zeroth index of both arrays to describe
  the initial situation, namely, that the LOD doesn't exist in
  revision r0."""

  __slots__ = ['_mirror', 'lod', 'revnums', 'ids']

  def __init__(self, mirror, lod):
    self._mirror = mirror
    self.lod = lod
    self.revnums = [0]
    self.ids = [None]

  def get_id(self, revnum):
    """Get the ID of the root path for this LOD in REVNUM.

    Raise KeyError if this LOD didn't exist in REVNUM."""

    index = bisect.bisect_right(self.revnums, revnum) - 1
    id = self.ids[index]

    if id is None:
      raise KeyError()

    return id

  def get_current_id(self):
    """Get the ID of the root path for this LOD in the current revision.

    Raise KeyError if this LOD doesn't currently exist."""

    id = self.ids[-1]

    if id is None:
      raise KeyError()

    return id

  def exists(self):
    """Return True iff LOD exists in the current revision."""

    return self.ids[-1] is not None

  def update(self, revnum, id):
    """Indicate that the root node of this LOD changed to ID at REVNUM.

    REVNUM is a revision number that must be the same as that of the
    previous recorded change (in which case the previous change is
    overwritten) or later (in which the new change is appended).

    ID can be a node ID, or it can be None to indicate that this LOD
    ceased to exist in REVNUM."""

    if revnum < self.revnums[-1]:
      raise KeyError()
    elif revnum == self.revnums[-1]:
      # This is an attempt to overwrite an entry that was already
      # updated during this revision.  Don't allow the replacement
      # None -> None or allow one new id to be replaced with another:
      old_id = self.ids[-1]
      if old_id is None and id is None:
        raise InternalError(
            'ID changed from None -> None for %s, r%d' % (self.lod, revnum,)
            )
      elif (old_id is not None and id is not None
            and old_id in self._mirror._new_nodes):
        raise InternalError(
            'ID changed from %x -> %x for %s, r%d'
            % (old_id, id, self.lod, revnum,)
            )
      self.ids[-1] = id
    else:
      self.revnums.append(revnum)
      self.ids.append(id)


class _NodeDatabase(object):
  """A database storing all of the directory nodes.

  The nodes are written in groups every time write_new_nodes() is
  called.  To the database is written a dictionary {node_id :
  [(cvs_path.id, node_id),...]}, where the keys are the node_ids of
  the new nodes.  When a node is read, its whole group is read and
  cached under the assumption that the other nodes in the group are
  likely to be needed soon.  The cache is retained across revisions
  and cleared when _cache_max_size is exceeded.

  The dictionaries for nodes that have been read from the database
  during the current revision are cached by node_id in the _cache
  member variable.  The corresponding dictionaries are *not* copied
  when read.  To avoid cross-talk between distinct MirrorDirectory
  instances that have the same node_id, users of these dictionaries
  have to copy them before modification."""

  # How many entries should be allowed in the cache for each
  # CVSDirectory in the repository.  (This number is very roughly the
  # number of complete lines of development that can be stored in the
  # cache at one time.)
  CACHE_SIZE_MULTIPLIER = 5

  # But the cache will never be limited to less than this number:
  MIN_CACHE_LIMIT = 5000

  def __init__(self):
    self.cvs_file_db = Ctx()._cvs_file_db
    self.db = IndexedDatabase(
        artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=MarshalSerializer(),
        )

    # A list of the maximum node_id stored by each call to
    # write_new_nodes():
    self._max_node_ids = [0]

    # A map {node_id : {cvs_path : node_id}}:
    self._cache = {}

    # The number of directories in the repository:
    num_dirs = len([
        cvs_path
        for cvs_path in self.cvs_file_db.itervalues()
        if isinstance(cvs_path, CVSDirectory)
        ])

    self._cache_max_size = max(
        int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
        self.MIN_CACHE_LIMIT,
        )

  def _load(self, items):
    retval = {}
    for (id, value) in items:
      retval[self.cvs_file_db.get_file(id)] = value
    return retval

  def _dump(self, node):
    return [
        (cvs_path.id, value)
        for (cvs_path, value) in node.iteritems()
        ]

  def _determine_index(self, id):
    """Return the index of the record holding the node with ID."""

    return bisect.bisect_left(self._max_node_ids, id)

  def __getitem__(self, id):
    try:
      items = self._cache[id]
    except KeyError:
      index = self._determine_index(id)
      for (node_id, items) in self.db[index].items():
        self._cache[node_id] = self._load(items)
      items = self._cache[id]

    return items

  def write_new_nodes(self, nodes):
    """Write NODES to the database.

    NODES is an iterable of writable CurrentMirrorDirectory instances."""

    if len(self._cache) > self._cache_max_size:
      # The size of the cache has exceeded the threshold.  Discard the
      # old cache values (but still store the new nodes into the
      # cache):
      Log().debug('Clearing node cache')
      self._cache.clear()

    data = {}
    max_node_id = 0
    for node in nodes:
      max_node_id = max(max_node_id, node.id)
      data[node.id] = self._dump(node._entries)
      self._cache[node.id] = node._entries

    self.db[len(self._max_node_ids)] = data

    if max_node_id == 0:
      # Rewrite last value:
      self._max_node_ids.append(self._max_node_ids[-1])
    else:
      self._max_node_ids.append(max_node_id)

  def close(self):
    self._cache.clear()
    self.db.close()
    self.db = None


class RepositoryMirror:
  """Mirror a repository and its history.

  Mirror a repository as it is constructed, one revision at a time.
  For each LineOfDevelopment we store a skeleton of the directory
  structure within that LOD for each revnum in which it changed.

  For each LOD that has been seen so far, an LODHistory instance is
  stored in self._lod_histories.  An LODHistory keeps track of each
  revnum in which files were added to or deleted from that LOD, as
  well as the node id of the root of the node tree describing the LOD
  contents at that revision.

  The LOD trees themselves are stored in the _node_db database, which
  maps node ids to nodes.  A node is a map from CVSPath to ids of the
  corresponding subnodes.  The _node_db is stored on disk and each
  access is expensive.

  The _node_db database only holds the nodes for old revisions.  The
  revision that is being constructed is kept in memory in the
  _new_nodes map, which is cheap to access.

  You must invoke start_commit() before each commit and end_commit()
  afterwards."""

  def register_artifacts(self, which_pass):
    """Register the artifacts that will be needed for this object."""

    artifact_manager.register_temp_file(
        config.MIRROR_NODES_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(
        config.MIRROR_NODES_STORE, which_pass
        )

  def open(self):
    """Set up the RepositoryMirror and prepare it for commits."""

    self._key_generator = KeyGenerator()

    # A map from LOD to LODHistory instance for all LODs that have
    # been referenced so far:
    self._lod_histories = {}

    # This corresponds to the 'nodes' table in a Subversion fs.  (We
    # don't need a 'representations' or 'strings' table because we
    # only track file existence, not file contents.)
    self._node_db = _NodeDatabase()

    # Start at revision 0 without a root node.
    self._youngest = 0

  def start_commit(self, revnum):
    """Start a new commit."""

    assert revnum > self._youngest
    self._youngest = revnum

    # A map {node_id : _WritableMirrorDirectoryMixin}.
    self._new_nodes = {}

  def end_commit(self):
    """Called at the end of each commit.

    This method copies the newly created nodes to the on-disk nodes
    db."""

    # Copy the new nodes to the _node_db
    self._node_db.write_new_nodes([
        node
        for node in self._new_nodes.values()
        if not isinstance(node, DeletedCurrentMirrorDirectory)
        ])

    del self._new_nodes

  def _get_lod_history(self, lod):
    """Return the LODHistory instance describing LOD.

    Create a new (empty) LODHistory if it doesn't yet exist."""

    try:
      return self._lod_histories[lod]
    except KeyError:
      lod_history = LODHistory(self, lod)
      self._lod_histories[lod] = lod_history
      return lod_history

  def get_old_lod_directory(self, lod, revnum):
    """Return the directory for the root path of LOD at revision REVNUM.

    Return an instance of MirrorDirectory if the path exists;
    otherwise, raise KeyError."""

    lod_history = self._get_lod_history(lod)
    id = lod_history.get_id(revnum)
    return OldMirrorDirectory(self, id, self._node_db[id])

  def get_old_path(self, cvs_path, lod, revnum):
    """Return the node for CVS_PATH from LOD at REVNUM.

    If CVS_PATH is a CVSDirectory, then return an instance of
    OldMirrorDirectory.  If CVS_PATH is a CVSFile, return None.

    If CVS_PATH does not exist in the specified LOD and REVNUM, raise
    KeyError."""

    node = self.get_old_lod_directory(lod, revnum)

    for sub_path in cvs_path.get_ancestry()[1:]:
      node = node[sub_path]

    return node

  def get_current_lod_directory(self, lod):
    """Return the directory for the root path of LOD in the current revision.

    Return an instance of CurrentMirrorDirectory.  Raise KeyError if
    the path doesn't already exist."""

    lod_history = self._get_lod_history(lod)
    id = lod_history.get_current_id()
    try:
      return self._new_nodes[id]
    except KeyError:
      return _CurrentMirrorReadOnlyLODDirectory(
          self, id, lod, self._node_db[id]
          )

  def get_current_path(self, cvs_path, lod):
    """Return the node for CVS_PATH from LOD in the current revision.

    If CVS_PATH is a CVSDirectory, then return an instance of
    CurrentMirrorDirectory.  If CVS_PATH is a CVSFile, return None.

    If CVS_PATH does not exist in the current revision of the
    specified LOD, raise KeyError."""

    node = self.get_current_lod_directory(lod)

    for sub_path in cvs_path.get_ancestry()[1:]:
      node = node[sub_path]

    return node

  def add_lod(self, lod):
    """Create a new LOD in this repository.

    Return the CurrentMirrorDirectory that was created.  If the LOD
    already exists, raise LODExistsError."""

    lod_history = self._get_lod_history(lod)
    if lod_history.exists():
      raise LODExistsError(
          'Attempt to create %s in repository mirror when it already exists.'
          % (lod,)
          )
    new_node = _CurrentMirrorWritableLODDirectory(
        self, self._key_generator.gen_id(), lod, {}
        )
    lod_history.update(self._youngest, new_node.id)
    self._new_nodes[new_node.id] = new_node
    return new_node

  def copy_lod(self, src_lod, dest_lod, src_revnum):
    """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination LOD
    *must not* already exist.

    Return the new node at DEST_LOD, as a CurrentMirrorDirectory."""

    # Get the node of our src_path
    src_node = self.get_old_lod_directory(src_lod, src_revnum)

    dest_lod_history = self._get_lod_history(dest_lod)
    if dest_lod_history.exists():
      raise LODExistsError(
          'Attempt to copy to %s in repository mirror when it already exists.'
          % (dest_lod,)
          )

    dest_lod_history.update(self._youngest, src_node.id)

    # Return src_node, except packaged up as a CurrentMirrorDirectory:
    return self.get_current_lod_directory(dest_lod)

  def close(self):
    """Free resources and close databases."""

    self._lod_histories = None
    self._node_db.close()
    self._node_db = None