forked from aboutcode-org/scancode-toolkit
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathphpcomposer.py
More file actions
363 lines (303 loc) · 12 KB
/
phpcomposer.py
File metadata and controls
363 lines (303 loc) · 12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import io
import json
from functools import partial
from packagedcode import models
"""
Parse PHP composer package manifests, see https://getcomposer.org/ and
https://packagist.org/
TODO: add support for composer.lock and packagist formats: both are fairly
similar.
"""
class BasePhpComposerHandler(models.DatafileHandler):
@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
datafile_name_patterns = (
'composer.json',
'composer.lock',
)
if resource.has_parent():
dir_resource = resource.parent(codebase)
else:
dir_resource = resource
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=datafile_name_patterns,
directory=dir_resource,
codebase=codebase,
package_adder=package_adder,
)
@classmethod
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase, package_adder)
class PhpComposerJsonHandler(BasePhpComposerHandler):
datasource_id = 'php_composer_json'
path_patterns = ('*composer.json',)
default_package_type = 'composer'
default_primary_language = 'PHP'
default_relation_license = 'OR'
description = 'PHP composer manifest'
documentation_url = 'https://getcomposer.org/doc/04-schema.md'
@classmethod
def parse(cls, location, package_only=False):
"""
Yield one or more Package manifest objects given a file ``location``
pointing to a package archive, manifest or similar.
Note that this is NOT exactly the packagist.json format (all are closely
related of course but have important (even if minor) differences.
"""
with io.open(location, encoding='utf-8') as loc:
package_json = json.load(loc)
yield build_package_data(package_json, package_only)
def get_repository_homepage_url(namespace, name):
if namespace and name:
return f'https://packagist.org/packages/{namespace}/{name}'
elif name:
return f'https://packagist.org/packages/{name}'
def get_api_data_url(namespace, name):
if namespace and name:
return f'https://packagist.org/p/packages/{namespace}/{name}.json'
elif name:
return f'https://packagist.org/p/packages/{name}.json'
def build_package_data(package_data, package_only=False):
# Note: A composer.json without name and description is not a usable PHP
# composer package. Name and description fields are required but only for
# published packages: https://getcomposer.org/doc/04-schema.md#name We want
# to catch both published and non-published packages here. Therefore, we use
# None as a package name if there is no name.
ns_name = package_data.get('name')
is_private = False
if not ns_name:
ns = None
name = None
is_private = True
else:
ns, _, name = ns_name.rpartition('/')
package_mapping = dict(
datasource_id=PhpComposerJsonHandler.datasource_id,
type=PhpComposerJsonHandler.default_package_type,
namespace=ns,
name=name,
repository_homepage_url=get_repository_homepage_url(ns, name),
api_data_url=get_api_data_url(ns, name),
primary_language=PhpComposerJsonHandler.default_primary_language,
is_private=is_private,
)
package = models.PackageData.from_data(package_mapping, package_only)
# mapping of top level composer.json items to the Package object field name
plain_fields = [
('version', 'version'),
('description', 'summary'),
('keywords', 'keywords'),
('homepage', 'homepage_url'),
]
for source, target in plain_fields:
value = package_data.get(source)
if isinstance(value, str):
value = value.strip()
if value:
setattr(package, target, value)
# mapping of top level composer.json items to a function accepting as
# arguments the composer.json element value and returning an iterable of
# key, values Package Object to update
field_mappers = [
('authors', author_mapper),
('license', partial(licensing_mapper, is_private=is_private)),
('support', support_mapper),
('require', partial(_deps_mapper, scope='require', is_runtime=True)),
('require-dev', partial(_deps_mapper, scope='require-dev', is_optional=True)),
('provide', partial(_deps_mapper, scope='provide', is_runtime=True)),
('conflict', partial(_deps_mapper, scope='conflict', is_runtime=True, is_optional=True)),
('replace', partial(_deps_mapper, scope='replace', is_runtime=True, is_optional=True)),
('suggest', partial(_deps_mapper, scope='suggest', is_runtime=True, is_optional=True)),
('source', source_mapper),
('dist', dist_mapper)
]
for source, func in field_mappers:
value = package_data.get(source)
if value:
if isinstance(value, str):
value = value.strip()
if value:
func(value, package)
# Parse vendor from name value
vendor_mapper(package)
# Per https://getcomposer.org/doc/04-schema.md#license this is an expression
if not package_only:
package.populate_license_fields()
return package
class PhpComposerLockHandler(BasePhpComposerHandler):
datasource_id = 'php_composer_lock'
path_patterns = ('*composer.lock',)
default_package_type = 'composer'
default_primary_language = 'PHP'
description = 'PHP composer lockfile'
documentation_url = 'https://getcomposer.org/doc/01-basic-usage.md#commit-your-composer-lock-file-to-version-control'
@classmethod
def parse(cls, location, package_only=False):
with io.open(location, encoding='utf-8') as loc:
package_data = json.load(loc)
packages = [
build_package_data(p, package_only)
for p in package_data.get('packages', [])
]
packages_dev = [
build_package_data(p, package_only)
for p in package_data.get('packages-dev', [])
]
required_deps = [
build_dep_package(p, scope='require', is_runtime=True, is_optional=False)
for p in packages
]
required_dev_deps = [
build_dep_package(p, scope='require-dev', is_runtime=False, is_optional=True)
for p in packages_dev
]
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
dependencies=required_deps + required_dev_deps
)
yield models.PackageData.from_data(package_data, package_only)
for package in packages + packages_dev:
yield package
def licensing_mapper(licenses, package, is_private=False):
"""
Update package licensing and return package.
Licensing data structure has evolved over time and is a tad messy.
https://getcomposer.org/doc/04-schema.md#license
The value of license is either:
- an SPDX expression string: { "license": "(LGPL-2.1 or GPL-3.0+)" }
- a list of SPDX license ids choices: "license": ["LGPL-2.1","GPL-3.0+"]
Some older licenses are plain strings and not SPDX ids. Also if there is no
license and the `is_private` Fkag is True, we return a "proprietary-license"
license.
"""
if not licenses and is_private:
package.extracted_license_statement = 'proprietary-license'
return package
package.extracted_license_statement = licenses
return package
def author_mapper(authors_content, package):
"""
Update package parties with authors and return package.
https://getcomposer.org/doc/04-schema.md#authors
"""
for name, role, email, url in parse_person(authors_content):
role = role or 'author'
package.parties.append(
models.Party(type=models.party_person, name=name,
role=role, email=email, url=url))
return package
def support_mapper(support, package):
"""
Update support and bug tracking url.
https://getcomposer.org/doc/04-schema.md#support
"""
# TODO: there are many other information we ignore for now
package.bug_tracking_url = support.get('issues') or None
package.code_view_url = support.get('source') or None
return package
def source_mapper(source, package):
"""
Add vcs_url from source tag, if present. Typically only present in
composer.lock
"""
tool = source.get('type')
if not tool:
return package
url = source.get('url')
if not url:
return package
version = source.get('reference')
package.vcs_url = '{tool}+{url}@{version}'.format(**locals())
return package
def dist_mapper(dist, package):
"""
Add download_url from source tag, if present. Typically only present in
composer.lock
"""
url = dist.get('url')
if not url:
return package
package.download_url = url
return package
def vendor_mapper(package):
"""
Vendor is the first part of the name element.
https://getcomposer.org/doc/04-schema.md#name
"""
if package.namespace:
package.parties.append(
models.Party(type=models.party_person,
name=package.namespace, role='vendor'))
return package
def _deps_mapper(deps, package, scope, is_runtime=False, is_optional=False):
"""
Handle deps such as dependencies, devDependencies
return a tuple of (dep type, list of deps)
https://getcomposer.org/doc/04-schema.md#package-links
"""
for ns_name, requirement in deps.items():
ns, _, name = ns_name.rpartition('/')
purl = models.PackageURL(type='composer', namespace=ns, name=name).to_string()
dep = models.DependentPackage(
purl=purl,
extracted_requirement=requirement,
scope=scope,
is_runtime=is_runtime,
is_optional=is_optional)
package.dependencies.append(dep)
return package
def parse_person(persons):
"""
https://getcomposer.org/doc/04-schema.md#authors
A "person" is an object with a "name" field and optionally "url" and "email".
Yield a name, email, url tuple for a person object
A person can be in the form:
"authors": [
{
"name": "Nils Adermann",
"email": "naderman@naderman.de",
"homepage": "http://www.naderman.de",
"role": "Developer"
},
{
"name": "Jordi Boggiano",
"email": "j.boggiano@seld.be",
"homepage": "http://seld.be",
"role": "Developer"
}
]
Both forms are equivalent.
"""
if isinstance(persons, list):
for person in persons:
# ensure we have our three values
name = person.get('name')
role = person.get('role')
email = person.get('email')
url = person.get('homepage')
# FIXME: this got cargoculted from npm package.json parsing
yield (
name and name.strip(),
role and role.strip(),
email and email.strip('<> '),
url and url.strip('() '))
else:
raise ValueError('Incorrect PHP composer persons: %(persons)r' % locals())
def build_dep_package(package, scope, is_runtime, is_optional):
return models.DependentPackage(
purl=package.purl,
scope=scope,
is_runtime=is_runtime,
is_optional=is_optional,
is_pinned=True,
)