aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'cvs2svn_lib/checkout_internal.py')
-rw-r--r--cvs2svn_lib/checkout_internal.py778
1 files changed, 0 insertions, 778 deletions
diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py
deleted file mode 100644
index fe28e0c..0000000
--- a/cvs2svn_lib/checkout_internal.py
+++ /dev/null
@@ -1,778 +0,0 @@
-# (Be in -*- python -*- mode.)
-#
-# ====================================================================
-# Copyright (c) 2007-2009 CollabNet. All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://subversion.tigris.org/license-1.html.
-# If newer versions of this license are posted there, you may use a
-# newer version instead, at your option.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For exact contribution history, see the revision
-# history and logs, available at http://cvs2svn.tigris.org/.
-# ====================================================================
-
-"""This module contains classes that implement the --use-internal-co option.
-
-The idea is to patch up the revisions' contents incrementally, thus
-avoiding the huge number of process spawns and the O(n^2) overhead of
-using 'co' and 'cvs'.
-
-InternalRevisionRecorder saves the RCS deltas and RCS revision trees
-to databases. Notably, deltas from the trunk need to be reversed, as
-CVS stores them so they apply from HEAD backwards.
-
-InternalRevisionExcluder copies the revision trees to a new database,
-omitting excluded branches.
-
-InternalRevisionReader produces the revisions' contents on demand. To
-generate the text for a typical revision, we need the revision's delta
-text plus the fulltext of the previous revision. Therefore, we
-maintain a checkout database containing a copy of the fulltext of any
-revision for which subsequent revisions still need to be retrieved.
-It is crucial to remove text from this database as soon as it is no
-longer needed, to prevent it from growing enormous.
-
-There are two reasons that the text from a revision can be needed: (1)
-because the revision itself still needs to be output to a dumpfile;
-(2) because another revision needs it as the base of its delta. We
-maintain a reference count for each revision, which includes *both*
-possibilities. The first time a revision's text is needed, it is
-generated by applying the revision's deltatext to the previous
-revision's fulltext, and the resulting fulltext is stored in the
-checkout database. Each time a revision's fulltext is retrieved, its
-reference count is decremented. When the reference count goes to
-zero, then the fulltext is deleted from the checkout database.
-
-The administrative data for managing this consists of one TextRecord
-entry for each revision. Each TextRecord has an id, which is the same
-id as used for the corresponding CVSRevision instance. It also
-maintains a count of the times it is expected to be retrieved.
-TextRecords come in several varieties:
-
-FullTextRecord -- Used for revisions whose fulltext is contained
- directly in the RCS file, and therefore available during
- CollectRevsPass (i.e., typically revision 1.1 of each file).
-
-DeltaTextRecord -- Used for revisions that are defined via a delta
- relative to some other TextRecord. These records record the id of
- the TextRecord that holds the base text against which the delta is
- defined. When the text for a DeltaTextRecord is retrieved, the
- DeltaTextRecord instance is deleted and a CheckedOutTextRecord
- instance is created to take its place.
-
-CheckedOutTextRecord -- Used during OutputPass for a revision that
- started out as a DeltaTextRecord, but has already been retrieved
- (and therefore its fulltext is stored in the checkout database).
-
-While a file is being processed during CollectRevsPass, the fulltext
-and deltas are stored to the delta database, and TextRecord instances
-are created to keep track of things. The reference counts are all
-initialized to zero.
-
-After CollectRevsPass has done any preliminary tree mangling, its
-_FileDataCollector.parse_completed(), method calls
-RevisionRecorder.finish_file(), passing it the CVSFileItems instance
-that describes the revisions in the file. At this point the reference
-counts for the file's TextRecords are updated: each record referred to
-by a delta has its refcount incremented, and each record that
-corresponds to a non-delete CVSRevision is incremented. After that,
-any records with refcount==0 are removed. When one record is removed,
-that can cause another record's reference count to go to zero and be
-removed too, recursively. When a TextRecord is deleted at this stage,
-its deltatext is also deleted from the delta database.
-
-In FilterSymbolsPass, the exact same procedure (described in the
-previous paragraph) is repeated, but this time using the CVSFileItems
-after it has been updated for excluded symbols, symbol
-preferred-parent grafting, etc."""
-
-
-import cStringIO
-import re
-import time
-
-from cvs2svn_lib import config
-from cvs2svn_lib.common import DB_OPEN_NEW
-from cvs2svn_lib.common import DB_OPEN_READ
-from cvs2svn_lib.common import warning_prefix
-from cvs2svn_lib.common import FatalError
-from cvs2svn_lib.common import InternalError
-from cvs2svn_lib.context import Ctx
-from cvs2svn_lib.log import Log
-from cvs2svn_lib.artifact_manager import artifact_manager
-from cvs2svn_lib.symbol import Trunk
-from cvs2svn_lib.cvs_item import CVSRevisionModification
-from cvs2svn_lib.database import Database
-from cvs2svn_lib.database import IndexedDatabase
-from cvs2svn_lib.rcs_stream import RCSStream
-from cvs2svn_lib.rcs_stream import MalformedDeltaException
-from cvs2svn_lib.revision_manager import RevisionRecorder
-from cvs2svn_lib.revision_manager import RevisionExcluder
-from cvs2svn_lib.revision_manager import RevisionReader
-from cvs2svn_lib.serializer import MarshalSerializer
-from cvs2svn_lib.serializer import CompressingSerializer
-from cvs2svn_lib.serializer import PrimedPickleSerializer
-
-
-class TextRecord(object):
- """Bookkeeping data for the text of a single CVSRevision."""
-
- __slots__ = ['id', 'refcount']
-
- def __init__(self, id):
- # The cvs_rev_id of the revision whose text this is.
- self.id = id
-
- # The number of times that the text of this revision will be
- # retrieved.
- self.refcount = 0
-
- def __getstate__(self):
- return (self.id, self.refcount,)
-
- def __setstate__(self, state):
- (self.id, self.refcount,) = state
-
- def increment_dependency_refcounts(self, text_record_db):
- """Increment the refcounts of any records that this one depends on."""
-
- pass
-
- def decrement_refcount(self, text_record_db):
- """Decrement the number of times our text still has to be checked out.
-
- If the reference count goes to zero, call discard()."""
-
- self.refcount -= 1
- if self.refcount == 0:
- text_record_db.discard(self.id)
-
- def checkout(self, text_record_db):
- """Workhorse of the checkout process.
-
- Return the text for this revision, decrement our reference count,
- and update the databases depending on whether there will be future
- checkouts."""
-
- raise NotImplementedError()
-
- def free(self, text_record_db):
- """This instance will never again be checked out; free it.
-
- Also free any associated resources and decrement the refcounts of
- any other TextRecords that this one depends on."""
-
- raise NotImplementedError()
-
-
-class FullTextRecord(TextRecord):
- __slots__ = []
-
- def __getstate__(self):
- return (self.id, self.refcount,)
-
- def __setstate__(self, state):
- (self.id, self.refcount,) = state
-
- def checkout(self, text_record_db):
- text = text_record_db.delta_db[self.id]
- self.decrement_refcount(text_record_db)
- return text
-
- def free(self, text_record_db):
- del text_record_db.delta_db[self.id]
-
- def __str__(self):
- return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
-
-
-class DeltaTextRecord(TextRecord):
- __slots__ = ['pred_id']
-
- def __init__(self, id, pred_id):
- TextRecord.__init__(self, id)
-
- # The cvs_rev_id of the revision relative to which this delta is
- # defined.
- self.pred_id = pred_id
-
- def __getstate__(self):
- return (self.id, self.refcount, self.pred_id,)
-
- def __setstate__(self, state):
- (self.id, self.refcount, self.pred_id,) = state
-
- def increment_dependency_refcounts(self, text_record_db):
- text_record_db[self.pred_id].refcount += 1
-
- def checkout(self, text_record_db):
- base_text = text_record_db[self.pred_id].checkout(text_record_db)
- co = RCSStream(base_text)
- delta_text = text_record_db.delta_db[self.id]
- co.apply_diff(delta_text)
- text = co.get_text()
- del co
- self.refcount -= 1
- if self.refcount == 0:
- # This text will never be needed again; just delete ourselves
- # without ever having stored the fulltext to the checkout
- # database:
- del text_record_db[self.id]
- else:
- # Store a new CheckedOutTextRecord in place of ourselves:
- text_record_db.checkout_db['%x' % self.id] = text
- new_text_record = CheckedOutTextRecord(self.id)
- new_text_record.refcount = self.refcount
- text_record_db.replace(new_text_record)
- return text
-
- def free(self, text_record_db):
- del text_record_db.delta_db[self.id]
- text_record_db[self.pred_id].decrement_refcount(text_record_db)
-
- def __str__(self):
- return 'DeltaTextRecord(%x -> %x, %d)' \
- % (self.pred_id, self.id, self.refcount,)
-
-
-class CheckedOutTextRecord(TextRecord):
- __slots__ = []
-
- def __getstate__(self):
- return (self.id, self.refcount,)
-
- def __setstate__(self, state):
- (self.id, self.refcount,) = state
-
- def checkout(self, text_record_db):
- text = text_record_db.checkout_db['%x' % self.id]
- self.decrement_refcount(text_record_db)
- return text
-
- def free(self, text_record_db):
- del text_record_db.checkout_db['%x' % self.id]
-
- def __str__(self):
- return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
-
-
-class NullDatabase(object):
- """A do-nothing database that can be used with TextRecordDatabase.
-
- Use this when you don't actually want to allow anything to be
- deleted."""
-
- def __delitem__(self, id):
- pass
-
-
-class TextRecordDatabase:
- """Holds the TextRecord instances that are currently live.
-
- During CollectRevsPass and FilterSymbolsPass, files are processed
- one by one and a new TextRecordDatabase instance is used for each
- file. During OutputPass, a single TextRecordDatabase instance is
- used for the duration of OutputPass; individual records are added
- and removed when they are active."""
-
- def __init__(self, delta_db, checkout_db):
- # A map { cvs_rev_id -> TextRecord }.
- self.text_records = {}
-
- # A database-like object using cvs_rev_ids as keys and containing
- # fulltext/deltatext strings as values. Its __getitem__() method
- # is used to retrieve deltas when they are needed, and its
- # __delitem__() method is used to delete deltas when they can be
- # freed. The modifiability of the delta database varies from pass
- # to pass, so the object stored here varies as well:
- #
- # CollectRevsPass: a fully-functional IndexedDatabase. This
- # allows deltas that will not be needed to be deleted.
- #
- # FilterSymbolsPass: a NullDatabase. The delta database cannot be
- # modified during this pass, and we have no need to retrieve
- # deltas, so we just use a dummy object here.
- #
- # OutputPass: a disabled IndexedDatabase. During this pass we
- # need to retrieve deltas, but we are not allowed to modify the
- # delta database. So we use an IndexedDatabase whose __del__()
- # method has been disabled to do nothing.
- self.delta_db = delta_db
-
- # A database-like object using cvs_rev_ids as keys and containing
- # fulltext strings as values. This database is only set during
- # OutputPass.
- self.checkout_db = checkout_db
-
- # If this is set to a list, then the list holds the ids of
- # text_records that have to be deleted; when discard() is called,
- # it adds the requested id to the list but does not delete it. If
- # this member is set to None, then text_records are deleted
- # immediately when discard() is called.
- self.deferred_deletes = None
-
- def __getstate__(self):
- return (self.text_records.values(),)
-
- def __setstate__(self, state):
- (text_records,) = state
- self.text_records = {}
- for text_record in text_records:
- self.add(text_record)
- self.delta_db = NullDatabase()
- self.checkout_db = NullDatabase()
- self.deferred_deletes = None
-
- def add(self, text_record):
- """Add TEXT_RECORD to our database.
-
- There must not already be a record with the same id."""
-
- assert not self.text_records.has_key(text_record.id)
-
- self.text_records[text_record.id] = text_record
-
- def __getitem__(self, id):
- return self.text_records[id]
-
- def __delitem__(self, id):
- """Free the record with the specified ID."""
-
- del self.text_records[id]
-
- def replace(self, text_record):
- """Store TEXT_RECORD in place of the existing record with the same id.
-
- Do not do anything with the old record."""
-
- assert self.text_records.has_key(text_record.id)
- self.text_records[text_record.id] = text_record
-
- def discard(self, *ids):
- """The text records with IDS are no longer needed; discard them.
-
- This involves calling their free() methods and also removing them
- from SELF.
-
- If SELF.deferred_deletes is not None, then the ids to be deleted
- are added to the list instead of deleted immediately. This
- mechanism is to prevent a stack overflow from the avalanche of
- deletes that can result from deleting a long chain of revisions."""
-
- if self.deferred_deletes is None:
- # This is an outer-level delete.
- self.deferred_deletes = list(ids)
- while self.deferred_deletes:
- id = self.deferred_deletes.pop()
- text_record = self[id]
- if text_record.refcount != 0:
- raise InternalError(
- 'TextRecordDatabase.discard(%s) called with refcount = %d'
- % (text_record, text_record.refcount,)
- )
- # This call might cause other text_record ids to be added to
- # self.deferred_deletes:
- text_record.free(self)
- del self[id]
- self.deferred_deletes = None
- else:
- self.deferred_deletes.extend(ids)
-
- def itervalues(self):
- return self.text_records.itervalues()
-
- def recompute_refcounts(self, cvs_file_items):
- """Recompute the refcounts of the contained TextRecords.
-
- Use CVS_FILE_ITEMS to determine which records will be needed by
- cvs2svn."""
-
- # First clear all of the refcounts:
- for text_record in self.itervalues():
- text_record.refcount = 0
-
- # Now increment the reference count of records that are needed as
- # the source of another record's deltas:
- for text_record in self.itervalues():
- text_record.increment_dependency_refcounts(self.text_records)
-
- # Now increment the reference count of records that will be needed
- # by cvs2svn:
- for lod_items in cvs_file_items.iter_lods():
- for cvs_rev in lod_items.cvs_revisions:
- if isinstance(cvs_rev, CVSRevisionModification):
- self[cvs_rev.id].refcount += 1
-
- def free_unused(self):
- """Free any TextRecords whose reference counts are zero."""
-
- # The deletion of some of these text records might cause others to
- # be unused, in which case they will be deleted automatically.
- # But since the initially-unused records are not referred to by
- # any others, we don't have to be afraid that they will be deleted
- # before we get to them. But it *is* crucial that we create the
- # whole unused list before starting the loop.
-
- unused = [
- text_record.id
- for text_record in self.itervalues()
- if text_record.refcount == 0
- ]
-
- self.discard(*unused)
-
- def log_leftovers(self):
- """If any TextRecords still exist, log them."""
-
- if self.text_records:
- Log().warn(
- "%s: internal problem: leftover revisions in the checkout cache:"
- % warning_prefix)
- for text_record in self.itervalues():
- Log().warn(' %s' % (text_record,))
-
- def __repr__(self):
- """Debugging output of the current contents of the TextRecordDatabase."""
-
- retval = ['TextRecordDatabase:']
- for text_record in self.itervalues():
- retval.append(' %s' % (text_record,))
- return '\n'.join(retval)
-
-
-class InternalRevisionRecorder(RevisionRecorder):
- """A RevisionRecorder that reconstructs the fulltext internally."""
-
- def __init__(self, compress):
- RevisionRecorder.__init__(self)
- self._compress = compress
-
- def register_artifacts(self, which_pass):
- artifact_manager.register_temp_file(
- config.RCS_DELTAS_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
- artifact_manager.register_temp_file(
- config.RCS_TREES_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
-
- def start(self):
- ser = MarshalSerializer()
- if self._compress:
- ser = CompressingSerializer(ser)
- self._rcs_deltas = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
- artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
- DB_OPEN_NEW, ser)
- primer = (FullTextRecord, DeltaTextRecord)
- self._rcs_trees = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_TREES_STORE),
- artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
- DB_OPEN_NEW, PrimedPickleSerializer(primer))
-
- def start_file(self, cvs_file_items):
- self._cvs_file_items = cvs_file_items
-
- # A map from cvs_rev_id to TextRecord instance:
- self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
-
- def record_text(self, cvs_rev, log, text):
- if isinstance(cvs_rev.lod, Trunk):
- # On trunk, revisions are encountered in reverse order (1.<N>
- # ... 1.1) and deltas are inverted. The first text that we see
- # is the fulltext for the HEAD revision. After that, the text
- # corresponding to revision 1.N is the delta (1.<N+1> ->
- # 1.<N>)). We have to invert the deltas here so that we can
- # read the revisions out in dependency order; that is, for
- # revision 1.1 we want the fulltext, and for revision 1.<N> we
- # want the delta (1.<N-1> -> 1.<N>). This means that we can't
- # compute the delta for a revision until we see its logical
- # parent. When we finally see revision 1.1 (which is recognized
- # because it doesn't have a parent), we can record the diff (1.1
- # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
-
- if cvs_rev.next_id is None:
- # This is HEAD, as fulltext. Initialize the RCSStream so
- # that we can compute deltas backwards in time.
- self._stream = RCSStream(text)
- else:
- # Any other trunk revision is a backward delta. Apply the
- # delta to the RCSStream to mutate it to the contents of this
- # revision, and also to get the reverse delta, which we store
- # as the forward delta of our child revision.
- try:
- text = self._stream.invert_diff(text)
- except MalformedDeltaException, (msg):
- Log().error('Malformed RCS delta in %s, revision %s: %s'
- % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev,
- msg))
- raise RuntimeError
- text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
- self._writeout(text_record, text)
-
- if cvs_rev.prev_id is None:
- # This is revision 1.1. Write its fulltext:
- text_record = FullTextRecord(cvs_rev.id)
- self._writeout(text_record, self._stream.get_text())
-
- # There will be no more trunk revisions delivered, so free the
- # RCSStream.
- del self._stream
-
- else:
- # On branches, revisions are encountered in logical order
- # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
- # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
- # <BRANCH>.<N>). That's what we need, so just store it.
-
- # FIXME: It would be nice to avoid writing out branch deltas
- # when --trunk-only. (They will be deleted when finish_file()
- # is called, but if the delta db is in an IndexedDatabase the
- # deletions won't actually recover any disk space.)
- text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id)
- self._writeout(text_record, text)
-
- return None
-
- def _writeout(self, text_record, text):
- self.text_record_db.add(text_record)
- self._rcs_deltas[text_record.id] = text
-
- def finish_file(self, cvs_file_items):
- """Finish processing of the current file.
-
- Compute the initial text record refcounts, discard any records
- that are unneeded, and store the text records for the file to the
- _rcs_trees database."""
-
- # Delete our copy of the preliminary CVSFileItems:
- del self._cvs_file_items
-
- self.text_record_db.recompute_refcounts(cvs_file_items)
- self.text_record_db.free_unused()
- self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
- del self.text_record_db
-
- def finish(self):
- self._rcs_deltas.close()
- self._rcs_trees.close()
-
-
-class InternalRevisionExcluder(RevisionExcluder):
- """The RevisionExcluder used by InternalRevisionReader."""
-
- def register_artifacts(self, which_pass):
- artifact_manager.register_temp_file_needed(
- config.RCS_TREES_STORE, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.RCS_TREES_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file(
- config.RCS_TREES_FILTERED_STORE, which_pass
- )
- artifact_manager.register_temp_file(
- config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
- )
-
- def start(self):
- self._tree_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_TREES_STORE),
- artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
- DB_OPEN_READ)
- primer = (FullTextRecord, DeltaTextRecord)
- self._new_tree_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
- artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
- DB_OPEN_NEW, PrimedPickleSerializer(primer))
-
- def process_file(self, cvs_file_items):
- text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
- text_record_db.recompute_refcounts(cvs_file_items)
- text_record_db.free_unused()
- self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db
-
- def finish(self):
- self._tree_db.close()
- self._new_tree_db.close()
-
-
-class _KeywordExpander:
- """A class whose instances provide substitutions for CVS keywords.
-
- This class is used via its __call__() method, which should be called
- with a match object representing a match for a CVS keyword string.
- The method returns the replacement for the matched text.
-
- The __call__() method works by calling the method with the same name
- as that of the CVS keyword (converted to lower case).
-
- Instances of this class can be passed as the REPL argument to
- re.sub()."""
-
- date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
- date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12
-
- date_fmt = date_fmt_new
-
- @classmethod
- def use_old_date_format(klass):
- """Class method to ensure exact compatibility with CVS 1.11
- output. Use this if you want to verify your conversion and you're
- using CVS 1.11."""
- klass.date_fmt = klass.date_fmt_old
-
- def __init__(self, cvs_rev):
- self.cvs_rev = cvs_rev
-
- def __call__(self, match):
- return '$%s: %s $' % \
- (match.group(1), getattr(self, match.group(1).lower())(),)
-
- def author(self):
- return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
-
- def date(self):
- return time.strftime(self.date_fmt,
- time.gmtime(self.cvs_rev.timestamp))
-
- def header(self):
- return '%s %s %s %s Exp' % \
- (self.source(), self.cvs_rev.rev, self.date(), self.author())
-
- def id(self):
- return '%s %s %s %s Exp' % \
- (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
-
- def locker(self):
- # Handle kvl like kv, as a converted repo is supposed to have no
- # locks.
- return ''
-
- def log(self):
- # Would need some special handling.
- return 'not supported by cvs2svn'
-
- def name(self):
- # Cannot work, as just creating a new symbol does not check out
- # the revision again.
- return 'not supported by cvs2svn'
-
- def rcsfile(self):
- return self.cvs_rev.cvs_file.basename + ",v"
-
- def revision(self):
- return self.cvs_rev.rev
-
- def source(self):
- project = self.cvs_rev.cvs_file.project
- return project.cvs_repository_root + '/' + project.cvs_module + \
- self.cvs_rev.cvs_file.cvs_path + ",v"
-
- def state(self):
- # We check out only live revisions.
- return 'Exp'
-
-
-class InternalRevisionReader(RevisionReader):
- """A RevisionReader that reads the contents from an own delta store."""
-
- _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
- _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
- _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
-
- def __init__(self, compress):
- self._compress = compress
-
- def register_artifacts(self, which_pass):
- artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
- artifact_manager.register_temp_file_needed(
- config.RCS_DELTAS_STORE, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.RCS_DELTAS_INDEX_TABLE, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.RCS_TREES_FILTERED_STORE, which_pass
- )
- artifact_manager.register_temp_file_needed(
- config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
- )
-
- def start(self):
- self._delta_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
- artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
- DB_OPEN_READ)
- self._delta_db.__delitem__ = lambda id: None
- self._tree_db = IndexedDatabase(
- artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
- artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
- DB_OPEN_READ)
- ser = MarshalSerializer()
- if self._compress:
- ser = CompressingSerializer(ser)
- self._co_db = Database(
- artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
- ser)
-
- # The set of CVSFile instances whose TextRecords have already been
- # read:
- self._loaded_files = set()
-
- # A map { CVSFILE : _FileTree } for files that currently have live
- # revisions:
- self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
-
- def _get_text_record(self, cvs_rev):
- """Return the TextRecord instance for CVS_REV.
-
- If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
- do so now."""
-
- if cvs_rev.cvs_file not in self._loaded_files:
- for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
- self._text_record_db.add(text_record)
- self._loaded_files.add(cvs_rev.cvs_file)
-
- return self._text_record_db[cvs_rev.id]
-
- def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
- """Check out the text for revision C_REV from the repository.
-
- Return the text wrapped in a readable file object. If
- SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
- _un_expanded prior to returning the file content. Note that $Log$
- never actually generates a log (which makes test 'requires_cvs()'
- fail).
-
- Revisions may be requested in any order, but if they are not
- requested in dependency order the checkout database will become
- very large. Revisions may be skipped. Each revision may be
- requested only once."""
-
- try:
- text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
- except MalformedDeltaException, (msg):
- raise FatalError('Malformed RCS delta in %s, revision %s: %s'
- % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
- if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
- if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
- text = self._kw_re.sub(r'$\1$', text)
- else:
- text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
-
- return cStringIO.StringIO(text)
-
- def finish(self):
- self._text_record_db.log_leftovers()
-
- del self._text_record_db
- self._delta_db.close()
- self._tree_db.close()
- self._co_db.close()
-