# R overlay -- roverlay package, strutil # -*- coding: utf-8 -*- # Copyright (C) 2012 André Erdmann # Distributed under the terms of the GNU General Public License; # either version 2 of the License, or (at your option) any later version. """provides utility functions for string manipulation""" __all__ = [ 'ascii_filter', 'bytes_try_decode', 'fix_ebuild_name', 'pipe_lines', 'shorten_str', 'unquote', 'unquote_all', 'foreach_str', 'str_to_bool', ] import re _DEFAULT_ENCODINGS = ( 'utf-8', 'ascii', 'iso8859_15', 'utf-16', 'latin_1' ) _EBUILD_NAME_ILLEGAL_CHARS = re.compile ( "[.:]{1,}" ) _EBUILD_NAME_ILLEGAL_CHARS_REPLACE_BY = '_' def fix_ebuild_name ( name ): """Removes illegal chars from an ebuild name by replacing them with an underscore char '_'. arguments: * name -- returns: string without illegal chars """ return _EBUILD_NAME_ILLEGAL_CHARS.sub ( _EBUILD_NAME_ILLEGAL_CHARS_REPLACE_BY, name ) # --- end of fix_ebuild_name (...) --- def ascii_filter ( _str, additional_filter=None ): """Removes all non-ascii chars from a string and returns the result. arguments: * _str -- string to be filtered * additional_filter -- a function that is called for each ascii char and returns true if the char is allowed (i.e., should be kept in the resulting string), else False. Defaults to None, which means "keep all". """ if additional_filter is None: return ''.join ( c for c in _str if ord ( c ) < 128 ) else: return ''.join ( c for c in _str if ord ( c ) < 128 and additional_filter ( c ) ) # --- end of ascii_filter (...) --- def shorten_str ( s, maxlen, replace_end=None ): """Shortens a string s so that it isn't longer than maxlen chars. Optionally replaces the end of a shortened string with another string. Does nothing if len(s) <= maxlen. arguments: * s -- * maxlen -- * replace_end -- optional; replace the end of a shortened string by this string (e.g. "abcdefghijk", 6, " (s)" => "ab (s)") returns: shortened string """ if not replace_end is None: rlen = maxlen - len ( replace_end ) if rlen >= 0: return s[:rlen] + replace_end if len (s) > maxlen else s return s[:maxlen] if len (s) > maxlen else s # --- end of shorten_str (...) --- def pipe_lines ( _pipe, use_filter=False, filter_func=None ): """Returns text lines read from a pipe. arguments: * _pipe -- pipe to read * use_filter -- whether to use a filter or not. Defaults to False. * filter_func -- filter function to use (this can also be 'None') returns: text lines """ lines = _pipe.decode().split ('\n') if use_filter: return filter ( filter_func, lines ) else: return lines # --- end of pipe_lines (...) --- def unquote ( _str, keep_going=False ): """Removes enclosing quotes from a string. arguments: * _str -- * keep_going -- remove all enclosing quotes ("'"a"'" -> a) """ if len ( _str ) < 2: return _str chars = '\"\'' if _str [0] == _str [-1] and _str [0] in chars: return unquote ( _str[1:-1], True ) if keep_going else _str[1:-1] return _str # --- end of unquote (...) --- def unquote_all ( s ): return unquote ( s, keep_going=True ) # --- end of unquote_all (...) --- def bytes_try_decode ( byte_str, encodings=_DEFAULT_ENCODINGS, charwise_only=False, force_decode=False ): """Tries to decode a bytes object to str whose encoding is unknown but predictable (with charwise conversion as last resort). Returns byte_str if byte_str is already a str and force_decode is False, else a decoded str. arguments: * byte_str -- bytes object to decode * encodings -- encodings to try (None, str or list/iterable of str) * charwise_only -- do charwise conversion only * force_decode -- decode byte_str even if it's already a str """ if not isinstance ( byte_str, str ) or force_decode: if not charwise_only and encodings: ret = None if not isinstance ( encodings, str ): try_enc = encodings else: try_enc = ( encodings, ) for enc in try_enc: try: ret = byte_str.decode ( enc ) break except: ret = None if ret is not None: return ret ret = "" for c in byte_str: ret += chr ( c ) return ret else: return byte_str # --- end of bytes_try_decode() --- def foreach_str ( func, _str ): if isinstance ( _str, str ) or not hasattr ( _str, '__iter__' ): return func ( str ( _str ) ) else: return [ func(str(s)) for s in _str ] # --- end of foreach_str (...) --- def str_to_bool ( s, empty_return=None, nofail=False ): if not s: return empty_return slow = s.lower() if slow in { 'y', 'yes', '1', 'true', 'enabled', 'on' }: return True elif slow in { 'n', 'no', '0', 'false', 'disabled', 'off' }: return False elif nofail: return None else: raise ValueError(s) # --- end of str_to_bool (...) ---