summaryrefslogtreecommitdiff
blob: 8e8f8408da01687e2584ce3075c5878bc73082e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import re, os
from ebuild import *
from filetricks import *
from settings import *

#figure out the location of the portage tree, or return it if we already know it
portage_location=None
def portage_dir():
    global portage_location
    if portage_location!=None: #we already know
        return portage_location
    if os.path.exists('/etc/paludis/repositories/gentoo.conf'): #paludis stores repository data here
        conffile=open('/etc/paludis/repositories/gentoo.conf')
        for line in conffile:
            if 'location'==line[:len('location')]:
                portage_location=re.sub('\$\{.+?\}','',line[line.find('=')+1:]).strip() #remove ${ROOT} and other variables (lucky guess, lol)
    elif os.path.exists('/etc/make.conf'): #portage stores portage location here
        conffile=open('/etc/make.conf')
        for line in conffile: #find PORTDIR="blabla" line
            find_portdir=re.match('\\s*PORTDIR\\s*=\\s*("?)(.*)"?\\1\\s*',line)
            if find_portdir:
                portage_location=find_portdir.group(2).strip()
                break
        else:
            portage_location='/usr/portage'
    elif os.path.exists('/usr/portage'): #default location
        portage_location='/usr/portage'
    else:
        raise RuntimeError('Could not deduce portage location')
    return portage_location

#list packages in the portage tree
def portage_possible_deps(_portage_packages={}):
    if len(_portage_packages): return _portage_packages
    portdir=portage_dir()
    categories_file=open(os.path.join(portdir,'profiles','categories'))
    categories=[x.strip() for x in categories_file.readlines()]
    for category in categories:
        packages=[x for x in os.listdir(os.path.join(portdir,category)) if 
            os.path.isdir(os.path.join(portdir,category,x)) and x[0]!='.'] #exclude hidden dirs
        for package in packages:
            if package in _portage_packages: #already added, add to list
                _portage_packages[package].append(category+'/'+package)
            else:
                _portage_packages[package]=[category+'/'+package,]
    return _portage_packages


def pmsify_license_field(license_list):
    portdir=portage_dir()
    available_licenses=os.listdir(os.path.join(portdir,'licenses'))
    #note: the following returns a list of tuples
    matches=re.findall(r'\s*([^|]+)\s*',license_list) #split into licenses + versions
    licenses=[]
    for license_part in matches: #process each license option
        if license_part[:4]=='file': #license field referse to a file
            #licenses.append(license_part) #we'll have to somehow install this file...
            continue
        parse=re.match(r'(.+)\(\W*([^()]+?)\)',license_part)
        try_license=None
        if parse: #versioned license
            license_name=parse.group(1).strip()
            version=parse.group(2).strip()
            try_license=license_name+'-'+version
        else: #perhaps we just want version 1?
            try_license=license_part+'-1'
        for license in available_licenses:
            if license==license_part or try_license==license:
                licenses.append(license) #match, append
                break
        else:
            licenses.append('as-is') #unknown
    return licenses

def pmsify_package_name(name):
    if len(name)==0:
        raise RuntimeError('Empty package name')
    name=re.sub('[^a-zA-Z0-9+_-]','',name) #name may only contain these characters
    if not re.match('[a-zA-Z0-9+_].*',name): #package name must start with [a-zA-Z0-9+_]
        name='_'+name
    if re.match('.*-[0-9]+',name): #package name may not end in hyphen followed by integer
        name=name+'_'
    return name

def pmsify_package_version(version_str):
    return version_str.replace('-','.') #some CRAN-style versions have the form 0.1-1, which we transform into 0.1.1

def listify_package_list(package_list):
    matches=re.findall(r'\s*([^,)(]+?)\s*(\([^()]+?\))?\s*([,)(]|$)',package_list) #split into packages + versions
    packages=[]
    for package_part in matches: #process each package match
        #package_part[0] is package name
        #package_part[1] is package version matcher
        if len(package_part[0]): #if we see a package name
            packages.append(package_part[0])
    return packages

#what core packages are installed by the R package in portage?
R_core_packages=['base','datasets','grDevices','graphics','grid','methods','splines',
    'stats','stats4','tcltk','tools','utils',]
R_recommended_packages=['KernSmooth','MASS','Matrix','base','boot','class','cluster',
    'codetools','datasets','foreign','grDevices','graphics','grid','lattice','methods',
    'mgcv','nlme','nnet','rpart','spatial','splines','stats','stats4','survival','tcltk',
    'tools','utils',]
#parse dependency list
def pmsify_package_list(package_list, cran_packages):
    #note: the following returns a list of tuples
    pms_packages=[]
    portage_packages=portage_possible_deps()
    for package in package_list:
        if package=='tcltk':
            pms_packages.append('dev-lang/R[tk]')
        elif package in R_core_packages:
            pms_packages.append('dev-lang/R')
        elif package in R_recommended_packages:
            pms_packages.append('dev-lang/R[-minimal]')
        elif package in cran_packages:
            pms_packages.append('dev-R/'+pmsify_package_name(package))
        elif package in portage_packages:
            pms_packages.append(portage_packages[package][0]) #get qualified package name from the portage tree
        else: #fallback to dev-R and assume it's in another R package repository
            pms_packages.append('dev-R/'+pmsify_package_name(package))
    return pms_packages

#Parse package data loaded from the PACKAGES file
#into values we can work with in accordance with PMS
def pmsify_package_data(data,remote_repository):
    pms_pkg=Ebuild()
    pms_pkg.cran_data=data
    e_vars=pms_pkg.ebuild_vars
    #fix settings:
    if 'package' not in data:
        print data
        raise RuntimeError("No package name")
    e_vars['pn']=pmsify_package_name(data['package'])
    if 'version' not in data: #set a version even if we have none
        e_vars['pv']='0'
    else:
        e_vars['pv']=pmsify_package_version(data['version'])
    if 'depends' in data:
        deps=listify_package_list(data['depends'])
    else: #some packages don't set dependencies, so force dependency on R
        deps=['R',]

    if 'imports' in data:
        deps=deps+listify_package_list(data['imports'])
        
    e_vars['depend']=deps
    e_vars['pdepend']=[]
    e_vars['rdepend']=deps
    
    e_vars['iuse']="doc"
    try:
        uri_regex="([a-z0-9+.-]+):(?://(?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9A-F]{2})*)@)?((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9A-F]{2})*)(?::(\d*))?(/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?|(/?(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9A-F]{2})+(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9A-F]{2})*)?)(?:\?((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?(?:#((?:[a-z0-9-._~!$&'()*+,;=:/?@]|%[0-9A-F]{2})*))?"
        find_first_url=re.finditer(uri_regex,data['url'])
        e_vars['homepage']=find_first_url.next().group(0)
    except: #error processing url from data['url']
        e_vars['homepage']=remote_repository
    e_vars['keywords']="~x86 ~amd64"
    if 'title' in data:
        e_vars['description']=data['title'].strip().replace('\n',' ')
    elif 'description' in data:
        e_vars['description']=data['description'].split('. ')[0].strip().replace('\n',' ') #first sentence
    else:
        e_vars['description']=e_vars['pn']
    if 'license' in data: #fixme parse license data
        e_vars['license']=pmsify_license_field(data['license'])

    e_vars['src_uri']=remote_repository+'/src/contrib/'+data['package']+'_'+data['version']+'.tar.gz'
    return pms_pkg

#read sync'ed CRAN data into RAM to PMS-style packages
def read_packages(package_filename,local_repository):
    packages_file=open(package_filename,"r")
    file_parts=EmptyLinesFile(packages_file) #this is where we split the PACKAGES file into several file parts
    packages={}
    import rfc822
    repository_file=open(os.path.join(local_repository,REPO_MYDIR,'remote_uri'),'r')
    remote_uri=repository_file.read().strip()
    #read PACKAGES file
    while not file_parts.eof:
        cran_package=dict(rfc822.Message(file_parts).items()) #read part of PACKAGES file
        if len(cran_package):
            pms_package=pmsify_package_data(cran_package,remote_uri) #fix values
            packages[pms_package.cran_data['package']]=pms_package #store in dict
    #post-process dependency data, transform to qualified package names
    cran_packages=packages.keys()
    for cran_name, package in packages.iteritems():
        deps=pmsify_package_list(package.ebuild_vars['depend'],cran_packages)
        package.ebuild_vars['depend']=deps
        package.ebuild_vars['rdepend']=deps
    return packages.values()

#find a package in this repository... we really should cache this
#into some sqlite database once
def find_package(repo_location,package_name):
    packages=read_packages(os.path.join(repo_location,REPO_MYDIR,'PACKAGES'),repo_location)
    for package in packages:
        if package.ebuild_vars['pn']==package_name:
            return package
    raise ValueError("Package not found")