From e6fcf51f824bd476bec6106b7ad478e73be34cc5 Mon Sep 17 00:00:00 2001 From: Jauhien Piatlicki Date: Fri, 17 Apr 2015 00:23:16 +0200 Subject: change DB structure move to new DB layout with versioning support bson format for category files --- g_sorcery/bson/__init__.py | 1 + g_sorcery/bson/bson.py | 47 +++++ g_sorcery/db_layout.py | 283 ++++++++++++++++++++++++++++ g_sorcery/exceptions.py | 12 +- g_sorcery/fileutils.py | 62 ++++-- g_sorcery/package_db.py | 455 +++++++++++++++++++++------------------------ g_sorcery/serialization.py | 98 ++++++++-- gs_db_tool/gs_db_tool.py | 10 +- scripts/all_pythons.sh | 2 +- setup.py | 29 ++- tests/test_PackageDB.py | 10 +- 11 files changed, 707 insertions(+), 302 deletions(-) create mode 100644 g_sorcery/bson/__init__.py create mode 100644 g_sorcery/bson/bson.py create mode 100644 g_sorcery/db_layout.py diff --git a/g_sorcery/bson/__init__.py b/g_sorcery/bson/__init__.py new file mode 100644 index 0000000..4265cc3 --- /dev/null +++ b/g_sorcery/bson/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/g_sorcery/bson/bson.py b/g_sorcery/bson/bson.py new file mode 100644 index 0000000..fdb8bb9 --- /dev/null +++ b/g_sorcery/bson/bson.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" + bson.py + ~~~~~~~ + + bson file format support + + :copyright: (c) 2015 by Jauhien Piatlicki + :license: GPL-2, see LICENSE for more details. +""" + +import bson + +from g_sorcery.exceptions import FileJSONError +from g_sorcery.fileutils import FileJSONData +from g_sorcery.serialization import from_raw_serializable, to_raw_serializable + +class FileBSON(FileJSONData): + """ + Class for BSON files. Supports custom JSON serialization + provided by g_sorcery.serialization. + """ + def read_content(self): + """ + Read BSON file. + """ + content = {} + bcnt = None + with open(self.path, 'rb') as f: + bcnt = f.read() + if not bcnt: + raise FileJSONError('failed to read: ', self.path) + rawcnt = bson.BSON.decode(bcnt) + content = from_raw_serializable(rawcnt) + return content + + + def write_content(self, content): + """ + Write BSON file. + """ + rawcnt = to_raw_serializable(content) + bcnt = bson.BSON.encode(rawcnt) + with open(self.path, 'wb') as f: + f.write(bcnt) diff --git a/g_sorcery/db_layout.py b/g_sorcery/db_layout.py new file mode 100644 index 0000000..fe6f281 --- /dev/null +++ b/g_sorcery/db_layout.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" + db_layout.py + ~~~~~~~~~~~~ + + package database file layout + + :copyright: (c) 2013-2015 by Jauhien Piatlicki + :license: GPL-2, see LICENSE for more details. +""" + +import hashlib +import os +import shutil + +from .exceptions import DBLayoutError, DBStructureError, FileJSONError, IntegrityError +from .fileutils import FileJSON, hash_file + +CATEGORIES_FILE_NAME = 'categories' +MANIFEST_FILE_NAME = 'manifest' +METADATA_FILE_NAME = 'metadata' +PACKAGES_FILE_NAME = 'packages' + +JSON_FILE_SUFFIX = 'json' +BSON_FILE_SUFFIX = 'bson' + +class CategoryJSON(FileJSON): + """ + Category file in JSON format. + """ + def __init__(self, directory, category): + super(CategoryJSON, self).__init__(os.path.join(os.path.abspath(directory), category), + file_name(PACKAGES_FILE_NAME, JSON_FILE_SUFFIX)) + + +SUPPORTED_FILE_FORMATS = {JSON_FILE_SUFFIX: CategoryJSON} + + +# bson module is optional, we should check if it is installed +try: + from g_sorcery.bson.bson import FileBSON + + class CategoryBSON(FileBSON): + """ + Category file in BSON format. + """ + def __init__(self, directory, category): + super(CategoryBSON, self).__init__(os.path.join(os.path.abspath(directory), category), + file_name(PACKAGES_FILE_NAME, BSON_FILE_SUFFIX)) + + SUPPORTED_FILE_FORMATS[BSON_FILE_SUFFIX] = CategoryBSON + +except ImportError as e: + pass + + +def file_name(name, suffix=JSON_FILE_SUFFIX): + """ + Return file name based on name and suffix. + """ + return name + '.' + suffix + + +class Manifest(FileJSON): + """ + Manifest file. + """ + + def __init__(self, directory): + super(Manifest, self).__init__(os.path.abspath(directory), file_name(MANIFEST_FILE_NAME)) + + def check(self): + """ + Check manifest. + """ + manifest = self.read() + + result = True + errors = [] + + names = [file_name(CATEGORIES_FILE_NAME)] + for name in names: + if not name in manifest: + raise DBLayoutError('Bad manifest: no ' + name + ' entry') + + for name, value in manifest.items(): + if hash_file(os.path.join(self.directory, name), hashlib.md5()) != \ + value: + errors.append(name) + + if errors: + result = False + + return (result, errors) + + def digest(self, mandatory_files): + """ + Generate manifest. + """ + if not file_name(CATEGORIES_FILE_NAME) in mandatory_files: + raise DBLayoutError('Categories file: ' + file_name(CATEGORIES_FILE_NAME) \ + + ' is not in the list of mandatory files') + + categories = Categories(self.directory) + categories = categories.read() + + manifest = {} + + for name in mandatory_files: + manifest[name] = hash_file(os.path.join(self.directory, name), + hashlib.md5()) + + for category in categories: + category_path = os.path.join(self.directory, category) + if not os.path.isdir(category_path): + raise DBStructureError('Empty category: ' + category) + for root, _, files in os.walk(category_path): + for f in files: + manifest[os.path.join(root[len(self.directory)+1:], f)] = \ + hash_file(os.path.join(root, f), hashlib.md5()) + + self.write(manifest) + + +class Metadata(FileJSON): + """ + Metadata file. + """ + def __init__(self, directory): + super(Metadata, self).__init__(os.path.abspath(directory), + file_name(METADATA_FILE_NAME), + ['db_version', 'layout_version', 'category_format']) + + def read(self): + """ + Read metadata file. + + If file doesn't exist, we have a legacy DB + with DB layout v. 0. Fill metadata appropriately. + """ + if not os.path.exists(self.directory): + os.makedirs(self.directory) + content = {} + if not os.path.isfile(self.path): + content = {'db_version': 0, 'layout_version': 0, 'category_format': JSON_FILE_SUFFIX} + else: + content = self.read_content() + for key in self.mandatories: + if not key in content: + raise FileJSONError('lack of mandatory key: ' + key) + + return content + + +class Categories(FileJSON): + """ + Categories file. + """ + def __init__(self, directory): + super(Categories, self).__init__(os.path.abspath(directory), + file_name(CATEGORIES_FILE_NAME)) + + +def get_layout(metadata): + """ + Get layout parameters based on metadata. + """ + layout_version = metadata['layout_version'] + if layout_version == 0: + return (CategoryJSON, [file_name(CATEGORIES_FILE_NAME)]) + elif layout_version == 1: + category_format = metadata['category_format'] + try: + category_cls = SUPPORTED_FILE_FORMATS[category_format] + except KeyError: + raise DBLayoutError("unsupported packages file format: " + category_format) + return (category_cls, [file_name(CATEGORIES_FILE_NAME), file_name(METADATA_FILE_NAME)]) + else: + raise DBLayoutError("unsupported DB layout version: " + layout_version) + + +class DBLayout(object): + """ + Filesystem DB layout. + + Directory layout. + ~~~~~~~~~~~~~~~~~ + + For legacy DB layout v. 0: + + db dir + manifest.json: database manifest + categories.json: information about categories + category1 + packages.json: information about available packages + category2 + ... + + For DB layout v. 1: + + db dir + manifest.json: database manifest + categories.json: information about categories + metadata.json: DB metadata + category1 + packages.[b|j]son: information about available packages + category2 + ... + + Packages file can be in json or bson formats. + """ + + def __init__(self, directory): + self.directory = os.path.abspath(directory) + self.manifest = Manifest(self.directory) + + def check_manifest(self): + """ + Check manifest. + """ + sane, errors = self.manifest.check() + if not sane: + raise IntegrityError('Manifest error: ' + str(errors)) + + def clean(self): + """ + Remove DB files. + """ + if os.path.exists(self.directory): + shutil.rmtree(self.directory) + + def read(self): + """ + Read DB files. + + Returns a tuple with metadata, list of categories + and categories dictionary. + """ + self.check_manifest() + + metadata_f = Metadata(self.directory) + metadata = metadata_f.read() + + category_cls, _ = get_layout(metadata) + + categories_f = Categories(self.directory) + categories = categories_f.read() + + packages = {} + for category in categories: + category_path = os.path.join(self.directory, category) + if not os.path.isdir(category_path): + raise DBLayoutError('Empty category: ' + category) + category_f = category_cls(self.directory, category) + pkgs = category_f.read() + if not pkgs: + raise DBLayoutError('Empty category: ' + category) + packages[category] = pkgs + + return (metadata, categories, packages) + + def write(self, metadata, categories, packages): + """ + Write DB files. + """ + category_cls, mandatory_files = get_layout(metadata) + + self.clean() + + if file_name(METADATA_FILE_NAME) in mandatory_files: + metadata_f = Metadata(self.directory) + metadata_f.write(metadata) + + categories_f = Categories(self.directory) + categories_f.write(categories) + + for category in categories: + category_f = category_cls(self.directory, category) + category_f.write(packages[category]) + + self.manifest.digest(mandatory_files) diff --git a/g_sorcery/exceptions.py b/g_sorcery/exceptions.py index a8d7238..4691ce6 100644 --- a/g_sorcery/exceptions.py +++ b/g_sorcery/exceptions.py @@ -4,10 +4,10 @@ """ exceptions.py ~~~~~~~~~~~~~ - + Exceptions hierarchy - - :copyright: (c) 2013 by Jauhien Piatlicki + + :copyright: (c) 2013-2015 by Jauhien Piatlicki :license: GPL-2, see LICENSE for more details. """ @@ -17,6 +17,9 @@ class GSorceryError(Exception): class DBError(GSorceryError): pass +class DBLayoutError(GSorceryError): + pass + class InvalidKeyError(DBError): pass @@ -49,3 +52,6 @@ class DigestError(GSorceryError): class DownloadingError(GSorceryError): pass + +class SerializationError(GSorceryError): + pass diff --git a/g_sorcery/fileutils.py b/g_sorcery/fileutils.py index 443206c..d783c8a 100644 --- a/g_sorcery/fileutils.py +++ b/g_sorcery/fileutils.py @@ -4,10 +4,10 @@ """ fileutils.py ~~~~~~~~~~~~ - + file utilities - - :copyright: (c) 2013 by Jauhien Piatlicki + + :copyright: (c) 2013-2015 by Jauhien Piatlicki :license: GPL-2, see LICENSE for more details. """ @@ -15,17 +15,15 @@ import glob import json import hashlib import os -import shutil import tarfile from .compatibility import TemporaryDirectory from .exceptions import FileJSONError, DownloadingError from .serialization import JSONSerializer, deserializeHook -class FileJSON(object): +class FileJSONData(object): """ - Class for JSON files. Supports custom JSON serialization - provided by g_sorcery.serialization. + Class for files with JSON compatible data. """ def __init__(self, directory, name, mandatories=None): """ @@ -33,7 +31,6 @@ class FileJSON(object): directory: File directory. name: File name. mandatories: List of requiered keys. - loadconv: Type change values on loading. """ self.directory = os.path.abspath(directory) self.name = name @@ -45,7 +42,7 @@ class FileJSON(object): def read(self): """ - Read JSON file. + Read file. """ if not os.path.exists(self.directory): os.makedirs(self.directory) @@ -53,27 +50,58 @@ class FileJSON(object): if not os.path.isfile(self.path): for key in self.mandatories: content[key] = "" - with open(self.path, 'w') as f: - json.dump(content, f, indent=2, - sort_keys=True, cls=JSONSerializer) + self.write_content(content) else: - with open(self.path, 'r') as f: - content = json.load(f, object_hook=deserializeHook) + content = self.read_content() for key in self.mandatories: if not key in content: raise FileJSONError('lack of mandatory key: ' + key) - + return content + def read_content(self): + """ + Real read operation with deserialization. Should be overridden. + """ + return [] + def write(self, content): """ - Write JSON file. + Write file. """ for key in self.mandatories: if not key in content: raise FileJSONError('lack of mandatory key: ' + key) if not os.path.exists(self.directory): os.makedirs(self.directory) + self.write_content(content) + + def write_content(self, content): + """ + Real write operation with serialization. Should be overridden. + """ + pass + + +class FileJSON(FileJSONData): + """ + Class for JSON files. Supports custom JSON serialization + provided by g_sorcery.serialization. + """ + + def read_content(self): + """ + Read JSON file. + """ + content = {} + with open(self.path, 'r') as f: + content = json.load(f, object_hook=deserializeHook) + return content + + def write_content(self, content): + """ + Write JSON file. + """ with open(self.path, 'w') as f: json.dump(content, f, indent=2, sort_keys=True, cls=JSONSerializer) @@ -149,7 +177,7 @@ class ManifestEntry(object): __slots__ = ('directory', 'name', 'ftype', 'size', 'sha256', 'sha512', 'whirlpool') - + def __init__(self, directory, name, ftype): self.directory = directory self.name = name diff --git a/g_sorcery/package_db.py b/g_sorcery/package_db.py index f19f9d4..5eeeb63 100644 --- a/g_sorcery/package_db.py +++ b/g_sorcery/package_db.py @@ -4,45 +4,46 @@ """ package_db.py ~~~~~~~~~~~~~ - + package database - - :copyright: (c) 2013 by Jauhien Piatlicki + + :copyright: (c) 2013-2015 by Jauhien Piatlicki :license: GPL-2, see LICENSE for more details. """ import glob -import hashlib import os -import shutil -import sys import portage from .compatibility import basestring, py2k, TemporaryDirectory -from .exceptions import DBStructureError, IntegrityError, \ - InvalidKeyError, SyncError -from .fileutils import FileJSON, hash_file, load_remote_file, copy_all, wget +from .db_layout import DBLayout, JSON_FILE_SUFFIX +from .exceptions import DBError, DBStructureError, InvalidKeyError, SyncError +from .fileutils import FileJSON, load_remote_file, copy_all, wget from .g_collections import Package -from .logger import Logger, ProgressBar +from .logger import Logger class PackageDB(object): """ Package database. - Database is a directory and related data structure. - - Directory layout. - ~~~~~~~~~~~~~~~~~ - db dir - manifest.json: database manifest - categories.json: information about categories - category1 - packages.json: information about available packages - category2 - ... - + It uses DBLayout class to manipulate files that + contain DB stored on disk. + + There are two versions of DB layout now: + 0 -- legacy version + 1 -- new version that supports DB structure versioning + + DB structure itself has two versions: + 0 -- legacy version, categories contain dictionary package name: versions dict + 1 -- actual version, corresponds to the DB in memory: + DB is a dictionary with categories as keys. + Each category contains a dictionary with two entries: + common_data -- fields common to all the packages + packages -- dictionary with packages (content of category dictionary in v. 0) + + For DB layout v. 0 only DB structure v. 0 is possible. """ class Iterator(object): @@ -50,68 +51,113 @@ class PackageDB(object): Iterator class over the package database. """ def __init__(self, package_db): - self.pkg_iter = iter(package_db.database.items()) + self.cats_iter = iter(package_db.database.items()) try: - self.pkgname, self.vers_dict = next(self.pkg_iter) + self.cat_name, self.cat_data = next(self.cats_iter) except StopIteration: - self.pkgname, self.vers_dict = None, None - if self.vers_dict: - self.vers_iter = iter(self.vers_dict.items()) - else: - self.vers_iter = None + self.set_to_end() + return + + if not self.cat_data: + self.set_to_end() + return + + self.pkgs_iter = iter(self.cat_data['packages'].items()) + try: + self.pkg_name, self.pkg_data = next(self.pkgs_iter) + except StopIteration: + self.set_to_end() + return + + if not self.pkg_data: + self.set_to_end() + return + + self.vers_iter = iter(self.pkg_data.items()) + + def set_to_end(self): + self.cat_name, self.cat_data = None, None + self.pkgs_iter = None + self.pkg_name, self.pkg_data = None, None + self.vers_iter = None def __iter__(self): return self if py2k: def next(self): - if not self.vers_iter: + if not self.vers_iter or not self.pkgs_iter: raise StopIteration + ver, ebuild_data = None, None while not ver: try: ver, ebuild_data = next(self.vers_iter) except StopIteration: ver, ebuild_data = None, None - if not ver: - self.pkgname, self.vers_dict = next(self.pkg_iter) - self.vers_iter = iter(self.vers_dict.items()) + try: + self.pkg_name, self.pkg_data = next(self.pkgs_iter) + self.vers_iter = iter(self.pkg_data.items()) + except StopIteration: + self.cat_name, self.cat_data = next(self.cats_iter) + self.pkgs_iter = iter(self.cat_data['packages'].items()) + self.pkg_name, self.pkg_data = next(self.pkgs_iter) + self.vers_iter = iter(self.pkg_data.items()) + + ebuild_data.update(self.cat_data['common_data']) + return (Package(self.cat_name, self.pkg_name, ver), ebuild_data) - category, name = self.pkgname.split('/') - return (Package(category, name, ver), ebuild_data) else: def __next__(self): - if not self.vers_iter: + if not self.vers_iter or not self.pkgs_iter: raise StopIteration + ver, ebuild_data = None, None while not ver: try: ver, ebuild_data = next(self.vers_iter) except StopIteration: ver, ebuild_data = None, None - if not ver: - self.pkgname, self.vers_dict = next(self.pkg_iter) - self.vers_iter = iter(self.vers_dict.items()) + try: + self.pkg_name, self.pkg_data = next(self.pkgs_iter) + self.vers_iter = iter(self.pkg_data.items()) + except StopIteration: + self.cat_name, self.cat_data = next(self.cats_iter) + self.pkgs_iter = iter(self.cat_data['packages'].items()) + self.pkg_name, self.pkg_data = next(self.pkgs_iter) + self.vers_iter = iter(self.pkg_data.items()) - category, name = self.pkgname.split('/') - return (Package(category, name, ver), ebuild_data) + ebuild_data.update(self.cat_data['common_data']) + return (Package(self.cat_name, self.pkg_name, ver), ebuild_data) - def __init__(self, directory): - """ - Args: - directory: database directory. - """ + def __init__(self, directory, + preferred_layout_version=1, + preferred_db_version=1, + preferred_category_format=JSON_FILE_SUFFIX): + + if preferred_layout_version == 0 \ + and preferred_db_version != 0: + raise DBStructureError("Wrong DB version: " + preferred_db_version + \ + ", with DB layout version 0 it can be only 0") + + if not preferred_db_version in [0, 1]: + raise DBStructureError("Unsupported DB version: " + preferred_db_version) + self.logger = Logger() - self.CATEGORIES_NAME = 'categories.json' - self.PACKAGES_NAME = 'packages.json' self.directory = os.path.abspath(directory) + self.preferred_layout_version = preferred_layout_version + self.preferred_db_version = preferred_db_version + self.preferred_category_format = preferred_category_format + self.db_layout = DBLayout(self.directory) self.reset_db() + def __iter__(self): - return(PackageDB.Iterator(self)) + return PackageDB.Iterator(self) + def reset_db(self): """ @@ -120,6 +166,7 @@ class PackageDB(object): self.database = {} self.categories = {} + def sync(self, db_uri): """ Synchronize local database with remote database. @@ -131,7 +178,7 @@ class PackageDB(object): download_dir = TemporaryDirectory() if wget(real_db_uri, download_dir.name): raise SyncError('sync failed: ' + real_db_uri) - + temp_dir = TemporaryDirectory() for f_name in glob.iglob(os.path.join(download_dir.name, '*.tar.gz')): self.logger.info("unpacking " + f_name) @@ -140,19 +187,18 @@ class PackageDB(object): tempdb_dir = os.path.join(temp_dir.name, os.listdir(temp_dir.name)[0]) tempdb = PackageDB(tempdb_dir) - if not tempdb.check_manifest()[0]: - raise IntegrityError('Manifest check failed.') + tempdb.db_layout.check_manifest() self.logger.info("copy files to an actual database") self.clean() copy_all(tempdb_dir, self.directory) - - if not self.check_manifest()[0]: - raise IntegrityError('Manifest check failed, db inconsistent.') - + + self.db_layout.check_manifest() + del download_dir del temp_dir + def get_real_db_uri(self, db_uri): """ Convert self.db_uri to URI where remote database can be @@ -162,196 +208,61 @@ class PackageDB(object): URI of remote database file. """ return db_uri - - def manifest(self): - """ - Generate database manifest. - """ - categories = FileJSON(self.directory, self.CATEGORIES_NAME, []) - categories = categories.read() - manifest = {} - names = [self.CATEGORIES_NAME] - for name in names: - manifest[name] = hash_file(os.path.join(self.directory, name), - hashlib.md5()) - for category in categories: - category_path = os.path.join(self.directory, category) - if not os.path.isdir(category_path): - raise DBStructureError('Empty category: ' + category) - for root, dirs, files in os.walk(category_path): - for f in files: - manifest[os.path.join(root[len(self.directory)+1:], f)] = \ - hash_file(os.path.join(root, f), hashlib.md5()) - m_f = FileJSON(self.directory, 'manifest.json', []) - m_f.write(manifest) - - def check_manifest(self): - """ - Check database manifest. - Returns: - Tuple with first element containing result of manifest check - as boolean and second element containing list of files with errors. - """ - self.logger.info("checking manifest") - m_f = FileJSON(self.directory, 'manifest.json', []) - manifest = m_f.read() - - result = True - errors = [] - - names = [self.CATEGORIES_NAME] - for name in names: - if not name in manifest: - raise DBStructureError('Bad manifest: no ' + name + ' entry') - - for name, value in manifest.items(): - if hash_file(os.path.join(self.directory, name), hashlib.md5()) != \ - value: - errors.append(name) - - if errors: - result = False - - return (result, errors) def clean(self): """ Clean database. """ - if os.path.exists(self.directory): - shutil.rmtree(self.directory) + self.db_layout.clean() self.reset_db() - self.write_and_manifest() - - def write_and_manifest(self): - """ - Write and digest database. - """ self.write() - self.manifest() + def write(self): """ - Write database. + Write and digest database. """ - categories_f = FileJSON(self.directory, self.CATEGORIES_NAME, []) - categories_f.write(self.categories) - if self.database: - self.logger.info("writing database") + self.logger.info("writing database...") + + metadata = {'db_version': self.preferred_db_version, + 'layout_version': self.preferred_layout_version, + 'category_format': self.preferred_category_format} + + if self.preferred_db_version == 0: + packages = dict(self.database) + for category, cat_data in packages.items(): + for _, versions in cat_data['packages'].items(): + for version, ebuild_data in versions.items(): + ebuild_data.update(cat_data['common_data']) + packages[category] = cat_data['packages'] + else: + packages = dict(self.database) - progress_bar = ProgressBar(20, len(list(self.database))) - if self.database: - progress_bar.begin() - - categories_content = {} - for category in self.categories: - categories_content[category] = {} - - for pkgname, versions in self.database.items(): - category, name = pkgname.split('/') - if not category or (not category in self.categories): - raise DBStructureError('Non existent: ' + category) - categories_content[category][name] = {} - for version, content in versions.items(): - categories_content[category][name][version] = content - self.additional_write_version(category, name, version) - self.additional_write_package(category, name) - progress_bar.increment() - - for category in self.categories: - f = FileJSON(os.path.join(self.directory, category), self.PACKAGES_NAME, []) - f.write(categories_content[category]) - self.additional_write_category(category) - - self.additional_write() + self.db_layout.write(metadata, self.categories, packages) if self.database: - progress_bar.end() - print("") - - def additional_write_version(self, category, package, version): - """ - Hook to be overrided. - """ - pass - - def additional_write_package(self, category, package): - """ - Hook to be overrided. - """ - pass - - def additional_write_category(self, category): - """ - Hook to be overrided. - """ - pass + self.logger.info("database written") - def additional_write(self): - """ - Hook to be overrided. - """ - pass def read(self): """ Read database. """ - sane, errors = self.check_manifest() - if not sane: - raise IntegrityError('Manifest error: ' + str(errors)) - categories_f = FileJSON(self.directory, self.CATEGORIES_NAME, []) - self.categories = categories_f.read() - for category in self.categories: - category_path = os.path.join(self.directory, category) - if not os.path.isdir(category_path): - raise DBStructureError('Empty category: ' + category) - - f = FileJSON(category_path, self.PACKAGES_NAME, []) - packages = f.read() - if not packages: - raise DBStructureError('Empty category: ' + category) - - for name, versions in packages.items(): - - if not versions: - error_msg = 'Empty package: ' + category + '/' + name - raise DBStructureError(error_msg) - - pkgname = category + '/' + name - self.database[pkgname] = versions - for version in versions: - self.additional_read_version(category, name, version) - self.additional_read_package(category, name) - self.additional_read_category(category) - self.additional_read() + metadata, self.categories, packages = self.db_layout.read() - def additional_read_version(self, category, package, version): - """ - Hook to be overrided. - """ - pass + db_version = metadata['db_version'] + self.database = packages + if db_version == 0: + for category, cat_data in self.database.items(): + self.database[category] = {'common_data': {}, 'packages': cat_data} + elif db_version == 1: + pass + else: + raise DBStructureError("Unsupported DB version: " + db_version) - def additional_read_package(self, category, package): - """ - Hook to be overrided. - """ - pass - def additional_read_category(self, category): - """ - Hook to be overrided. - """ - pass - - def additional_read(self): - """ - Hook to be overrided. - """ - pass - def add_category(self, category, description=None): """ Add a category. @@ -364,6 +275,7 @@ class PackageDB(object): description = {} self.categories[category] = description + def add_package(self, package, ebuild_data=None): """ Add a package. @@ -374,15 +286,25 @@ class PackageDB(object): """ if not ebuild_data: ebuild_data = {} + category = package.category name = package.name version = package.version - pkgname = category + '/' + name - if category and not category in self.categories: + + if not category or not name or not version: + raise DBError("wrong package: " + str(package)) + + if not category in self.categories: raise InvalidKeyError('Non-existent category: ' + category) - if pkgname and not pkgname in self.database: - self.database[pkgname] = {} - self.database[pkgname][version] = ebuild_data + + if not category in self.database: + self.database[category] = {'common_data': {}, 'packages': {}} + + if not name in self.database[category]['packages']: + self.database[category]['packages'][name] = {} + + self.database[category]['packages'][name][version] = ebuild_data + def list_categories(self): """ @@ -393,6 +315,7 @@ class PackageDB(object): """ return list(self.categories) + def in_category(self, category, name): """ Tests whether a package is in a given category. @@ -406,7 +329,12 @@ class PackageDB(object): """ if not category or (not category in self.categories): raise InvalidKeyError('No such category: ' + category) - return (category + '/' + name) in self.database + + if not category in self.database: + return False + + return name in self.database[category]['packages'] + def list_package_names(self, category): """ @@ -420,9 +348,12 @@ class PackageDB(object): """ if not category or (not category in self.categories): raise InvalidKeyError('No such category: ' + category) - res = [x.split('/')[1] for x in self.database - if x.split('/')[0] == category] - return res + + if not category in self.database: + return [] + + return list(self.database[category]['packages']) + def list_catpkg_names(self): """ @@ -431,7 +362,12 @@ class PackageDB(object): Returns: List with category/package entries. """ - return list(self.database) + result = [] + for category, cat_data in self.database.items(): + for name in cat_data['packages']: + result.append(category + '/' + name) + return result + def list_package_versions(self, category, name): """ @@ -446,10 +382,13 @@ class PackageDB(object): """ if not category or (not category in self.categories): raise InvalidKeyError('No such category: ' + category) - pkgname = category + '/' + name - if not pkgname in self.database: - raise InvalidKeyError('No such package: ' + pkgname) - return list(self.database[pkgname]) + + if not category in self.database \ + or not name in self.database[category]['packages']: + raise InvalidKeyError('No such package: ' + category + '/' + name) + + return list(self.database[category]['packages'][name]) + def list_all_packages(self): """ @@ -459,12 +398,13 @@ class PackageDB(object): List of package_db.Package instances. """ result = [] - for pkgname, versions in self.database.items(): - for version in versions: - category, name = pkgname.split('/') - result.append(Package(category, name, version)) + for category, cat_data in self.database.items(): + for name, versions in cat_data['packages'].items(): + for version in versions: + result.append(Package(category, name, version)) return result + def get_package_description(self, package): """ Get package ebuild data. @@ -476,8 +416,11 @@ class PackageDB(object): Dictionary with package ebuild data. """ #a possible exception should be catched in the caller - return self.database[package.category \ - + '/' + package.name][package.version] + desc = dict(self.database[package.category]['packages']\ + [package.name][package.version]) + desc.update(self.database[package.category]['common_data']) + return desc + def get_max_version(self, category, name): """ @@ -490,10 +433,15 @@ class PackageDB(object): Returns: The recent version of a package. """ + if not category or (not category in self.categories): + raise InvalidKeyError('No such category: ' + category) + + if not category in self.database \ + or not name in self.database[category]['packages']: + raise InvalidKeyError('No such package: ' + category + '/' + name) + pkgname = category + '/' + name - if not pkgname in self.database: - raise InvalidKeyError('No such package: ' + pkgname) - versions = list(self.database[pkgname]) + versions = list(self.database[category]['packages'][name]) max_ver = versions[0] for version in versions[1:]: if portage.pkgcmp(portage.pkgsplit(pkgname + '-' + version), @@ -510,8 +458,15 @@ class DBGenerator(object): __slots__ = ('package_db_class') - def __init__(self, package_db_class=PackageDB): + def __init__(self, package_db_class=PackageDB, + preferred_layout_version=1, + preferred_db_version=1, + preferred_category_format=JSON_FILE_SUFFIX): self.package_db_class = package_db_class + self.preferred_layout_version = preferred_layout_version + self.preferred_db_version = preferred_db_version + self.preferred_category_format = preferred_category_format + def __call__(self, directory, repository, common_config=None, config=None, generate=True): @@ -539,7 +494,10 @@ class DBGenerator(object): Package database. """ db_path = os.path.join(directory, repository, "db") - pkg_db = self.package_db_class(db_path) + pkg_db = self.package_db_class(db_path, + preferred_layout_version=self.preferred_layout_version, + preferred_db_version=self.preferred_db_version, + preferred_category_format=self.preferred_category_format) config_f = FileJSON(os.path.join(directory, repository), "config.json", []) @@ -557,9 +515,10 @@ class DBGenerator(object): if generate: pkg_db.clean() self.generate_tree(pkg_db, common_config, config) - pkg_db.write_and_manifest() + pkg_db.write() return pkg_db + def generate_tree(self, pkg_db, common_config, config): """ Generate package entries. @@ -739,7 +698,7 @@ class DBGenerator(object): Hook to convert external dependencies. """ return dependency - + def in_config(self, configs, list_name, value): """ Check whether value is in config. diff --git a/g_sorcery/serialization.py b/g_sorcery/serialization.py index e051596..780de6f 100644 --- a/g_sorcery/serialization.py +++ b/g_sorcery/serialization.py @@ -4,38 +4,100 @@ """ serialization.py ~~~~~~~~~~~~~~~~ - + json serialization - - :copyright: (c) 2013 by Jauhien Piatlicki + + :copyright: (c) 2013-2015 by Jauhien Piatlicki :license: GPL-2, see LICENSE for more details. """ import json import importlib +from .exceptions import SerializationError + +def step_to_raw_serializable(obj): + """ + Make one step of convertion of object + to the type that is serializable + by the json library. + + None return value signifies an error. + """ + if hasattr(obj, "serialize"): + if hasattr(obj, "deserialize"): + module = obj.__class__.__module__ + name = obj.__class__.__name__ + value = obj.serialize() + return {"python_module" : module, + "python_class" : name, + "value" : value} + else: + return obj.serialize() + return None + + +def to_raw_serializable(obj): + """ + Convert object to the raw serializable type. + Logic is the same as in the standard json encoder. + """ + if isinstance(obj, str) \ + or obj is None \ + or obj is True \ + or obj is False \ + or isinstance(obj, int) \ + or isinstance(obj, float) \ + or isinstance(obj, (list, tuple)) \ + or isinstance(obj, dict): + return obj + else: + sobj = step_to_raw_serializable(obj) + if not sobj: + raise SerializationError('Non serializable object: ', sobj) + return to_raw_serializable(sobj) + + +def step_from_raw_serializable(sobj): + """ + Make one step of building of object from the + raw json serializable type. + """ + if "python_class" in sobj: + module = importlib.import_module(sobj["python_module"]) + cls = getattr(module, sobj["python_class"]) + return cls.deserialize(sobj["value"]) + return sobj + + +def from_raw_serializable(sobj): + """ + Build object from the raw serializable object. + """ + if isinstance(sobj, dict): + res = {k: from_raw_serializable(v) for k, v in sobj.items()} + return step_from_raw_serializable(res) + elif isinstance(sobj, list): + return [from_raw_serializable(item) for item in sobj] + else: + return sobj + class JSONSerializer(json.JSONEncoder): """ Custom JSON encoder. Each serializable class should have a method serialize - that returns JSON serializable value. If class addfitionally + that returns JSON serializable value. If class additionally has a classmethod deserialize that it can be deserialized and additional metainformation is added to the resulting JSON. """ def default(self, obj): - if hasattr(obj, "serialize"): - if hasattr(obj, "deserialize"): - module = obj.__class__.__module__ - name = obj.__class__.__name__ - value = obj.serialize() - return {"python_module" : module, - "python_class" : name, - "value" : value} - else: - return obj.serialize() - return json.JSONEncoder.default(self, obj) + res = step_to_raw_serializable(obj) + if res: + return res + else: + return json.JSONEncoder.default(self, obj) def deserializeHook(json_object): @@ -46,8 +108,4 @@ def deserializeHook(json_object): that takes value (previously returned by serialize method) and transforms it into class instance. """ - if "python_class" in json_object: - module = importlib.import_module(json_object["python_module"]) - cls = getattr(module, json_object["python_class"]) - return cls.deserialize(json_object["value"]) - return json_object + return step_from_raw_serializable(json_object) diff --git a/gs_db_tool/gs_db_tool.py b/gs_db_tool/gs_db_tool.py index f949eae..d9087b8 100644 --- a/gs_db_tool/gs_db_tool.py +++ b/gs_db_tool/gs_db_tool.py @@ -4,10 +4,10 @@ """ gs_db_tool.py ~~~~~~~~~~~~~ - - CLI to manipulate with package DB - - :copyright: (c) 2013 by Jauhien Piatlicki + + CLI to manipulate package DB + + :copyright: (c) 2013-2015 by Jauhien Piatlicki :license: GPL-2, see LICENSE for more details. """ @@ -61,7 +61,7 @@ def transform_db(function): def transformator(pkg_db, args): pkg_db.read() function(pkg_db, args) - pkg_db.write_and_manifest() + pkg_db.write() return transformator diff --git a/scripts/all_pythons.sh b/scripts/all_pythons.sh index 9160dd0..af4c1f1 100755 --- a/scripts/all_pythons.sh +++ b/scripts/all_pythons.sh @@ -2,7 +2,7 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -for VER in 2.7 3.2 3.3 +for VER in 2.7 3.3 3.4 do echo echo "testing python${VER}" diff --git a/setup.py b/setup.py index 015ec76..866a38f 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,38 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" + setup.py + ~~~~~~~~ + + installation script + + :copyright: (c) 2013-2015 by Jauhien Piatlicki + :license: GPL-2, see LICENSE for more details. +""" + +import os from distutils.core import setup +SELECTABLE = ['bson'] + +use_defaults = ' '.join(list(SELECTABLE)) +USE = os.environ.get("USE", use_defaults).split() + +optional_modules = [] +for mod in SELECTABLE: + if mod in USE: + optional_modules.append('g_sorcery.%s' % mod) + setup(name = 'g-sorcery', - version = '0.1', + version = '0.2', description = 'framework for automated ebuild generators', author = 'Jauhien Piatlicki', author_email = 'jauhien@gentoo.org', - packages = ['g_sorcery', 'gs_db_tool'], + packages = ['g_sorcery', 'gs_db_tool'] + optional_modules, package_data = {'g_sorcery': ['data/*']}, scripts = ['bin/g-sorcery', 'bin/gs-db-tool'], data_files = [('/etc/g-sorcery/', ['g-sorcery.cfg'])], - license = 'GPL', + license = 'GPL-2', ) diff --git a/tests/test_PackageDB.py b/tests/test_PackageDB.py index 373fa4b..f73f006 100644 --- a/tests/test_PackageDB.py +++ b/tests/test_PackageDB.py @@ -4,10 +4,10 @@ """ test_PackageDB.py ~~~~~~~~~~~~~~~~ - + PackageDB test suite - - :copyright: (c) 2013 by Jauhien Piatlicki + + :copyright: (c) 2013-2015 by Jauhien Piatlicki :license: GPL-2, see LICENSE for more details. """ @@ -44,7 +44,7 @@ class TestPackageDB(BaseTest): for package in packages: orig_db.add_package(package, ebuild_data) - orig_db.write_and_manifest() + orig_db.write() os.system("cd " + orig_tempdir.name + " && tar cvzf good.tar.gz db") os.system("echo invalid >> " + orig_tempdir.name + "/db/app-test1/packages.json") os.system("cd " + orig_tempdir.name + " && tar cvzf dummy.tar.gz db") @@ -82,7 +82,7 @@ class TestPackageDB(BaseTest): self.assertEqual(data, ebuild_data) pkg_set.remove(package) self.assertTrue(not pkg_set) - + def suite(): suite = unittest.TestSuite() -- cgit v1.2.3-65-gdbad