Skip to content

Commit

Permalink
Parse and set namespace from distro ID correctly
Browse files Browse the repository at this point in the history
Update debian, rpm and alpine package assembly to get
distro identifier and then set this properly to
created package, dependency and package_uid instances.

Reference: #3443
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed May 8, 2024
1 parent e0a985a commit c9b9175
Show file tree
Hide file tree
Showing 10 changed files with 2,963 additions and 2,763 deletions.
20 changes: 10 additions & 10 deletions src/packagedcode/alpine.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,17 @@ def parse(cls, location, package_only=False):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
# get the root resource of the rootfs
levels_up = len('lib/apk/db/installed'.split('/'))
root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)
root_resource = cls.get_root_resource_for_rootfs(resource, codebase)

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
package_uid = package.package_uid
namespace = cls.get_distro_identifier_rootfs(root_resource, codebase)
if namespace:
package.namespace = namespace

package_uid = package.refresh_and_get_package_uid()

cls.populate_license_fields(package)

Expand Down Expand Up @@ -119,12 +117,14 @@ def assemble(cls, package_data, resource, codebase, package_adder):

dependent_packages = package_data.dependencies
if dependent_packages:
yield from models.Dependency.from_dependent_packages(
for dep in models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=resource.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
)
):
dep.update_namespace(namespace)
yield dep


class AlpineApkbuildHandler(models.DatafileHandler):
Expand Down
50 changes: 23 additions & 27 deletions src/packagedcode/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,37 +246,33 @@ def parse(cls, location, package_only=False):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
# get the root resource of the rootfs
levels_up = len('var/lib/dpkg/status'.split('/'))
root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)
root_resource = cls.get_root_resource_for_rootfs(resource, codebase)

package_name = package_data.name

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
namespace = cls.get_distro_identifier_rootfs(root_resource, codebase)
if namespace:
package.namespace = namespace

package_uid = package.refresh_and_get_package_uid()

package_file_references = []
package_file_references.extend(package_data.file_references)
package_uid = package.package_uid

dependencies = []
dependent_packages = package_data.dependencies
if dependent_packages:
deps = list(
models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=resource.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
)
)
dependencies.extend(deps)
for dep in models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=resource.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
):
dep.update_namespace(namespace)
dependencies.append(dep)

# Multi-Arch can be: "foreign", "same", "allowed", "all", "optional" or
# empty/non-present. See https://wiki.debian.org/Multiarch/HOWTO
Expand Down Expand Up @@ -341,15 +337,15 @@ def assemble(cls, package_data, resource, codebase, package_adder):
# yield possible dependencies
dependent_packages = package_data.dependencies
if dependent_packages:
deps = list(
models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=res.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
)
)
dependencies.extend(deps)
for dep in models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=res.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
):
if namespace and not dep.namespace:
dep.namespace = namespace
dependencies.append(dep)

resources.append(res)

Expand Down
59 changes: 59 additions & 0 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
licensing = None

from packagedcode.licensing import get_declared_license_expression_spdx
from packagedcode.utils import get_ancestor

"""
This module contain data models for package and dependencies, abstracting and
Expand Down Expand Up @@ -419,6 +420,24 @@ def __attrs_post_init__(self, *args, **kwargs):
if not self.dependency_uid:
self.dependency_uid = build_package_uid(self.purl)

def refresh_dependency_uid(self):
self.dependency_uid = build_package_uid(self.purl)

def update_namespace(self, namespace):
if not namespace:
return

purl = PackageURL.from_string(self.purl)
new_purl = PackageURL(
type=purl.type,
namespace=namespace,
name=purl.name,
version=purl.version,
qualifiers=purl.qualifiers,
)
self.purl = new_purl.to_string()
self.refresh_dependency_uid()

@classmethod
def from_dependent_package(
cls,
Expand Down Expand Up @@ -1461,6 +1480,42 @@ def get_top_level_resources(cls, manifest_resource, codebase):
"""
pass

@classmethod
def get_root_resource_for_rootfs(cls, resource, codebase):

# get the root resource of the rootfs
# take the 1st pattern as a reference
# for instance: '*usr/lib/sysimage/rpm/Packages.db'
base_path_patterns = cls.path_patterns[0]

# how many levels up are there to the root of the rootfs?
levels_up = len(base_path_patterns.split('/'))

return get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)

@classmethod
def get_distro_identifier_rootfs(cls, root_resource, codebase):
identifier = None
root_path = root_resource.path
os_release_rootfs_paths = ('etc/os-release', 'usr/lib/os-release',)
for os_release_rootfs_path in os_release_rootfs_paths:
os_release_path = '/'.join([root_path, os_release_rootfs_path])
os_release_res = codebase.get_resource(os_release_path)
if not os_release_res:
continue

# there can be only one distro
distro = os_release_res.package_data and os_release_res.package_data[0]
if distro:
identifier = distro.get("name")
break

return identifier


class NonAssemblableDatafileHandler(DatafileHandler):
"""
Expand Down Expand Up @@ -1535,6 +1590,10 @@ def __attrs_post_init__(self, *args, **kwargs):
if not self.package_uid:
self.package_uid = build_package_uid(self.purl)

def refresh_and_get_package_uid(self):
self.package_uid = build_package_uid(self.purl)
return self.package_uid

def to_dict(self):
return super().to_dict(with_details=False)

Expand Down
57 changes: 15 additions & 42 deletions src/packagedcode/rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from packagedcode.rpm_installed import collect_installed_rpmdb_xmlish_from_rpmdb_loc
from packagedcode.rpm_installed import parse_rpm_xmlish
from packagedcode.utils import build_description
from packagedcode.utils import get_ancestor

from scancode.api import get_licenses

TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)
Expand Down Expand Up @@ -143,54 +143,29 @@ def parse(cls, location, package_only=False):
package_type=cls.default_package_type,
package_only=package_only,
)
# TODO: package_data.namespace = cls.default_package_namespace
return package_data

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
# get the root resource of the rootfs
# take the 1st pattern as a reference
# for instance: '*usr/lib/sysimage/rpm/Packages.db'
base_path_patterns = cls.path_patterns[0]

# how many levels up are there to the root of the rootfs?
levels_up = len(base_path_patterns.split('/'))

root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)

root_resource = cls.get_root_resource_for_rootfs(resource, codebase)

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
package_uid = package.package_uid

root_path = root_resource.path
# get etc/os-release for namespace
namespace = None
os_release_rootfs_paths = ('etc/os-release', 'usr/lib/os-release',)
for os_release_rootfs_path in os_release_rootfs_paths:
os_release_path = '/'.join([root_path, os_release_rootfs_path])
os_release_res = codebase.get_resource(os_release_path)
if not os_release_res:
continue
# there can be only one distro
distro = os_release_res.package_data and os_release_res.package_data[0]
if distro:
namespace = distro.namespace
break
namespace = cls.get_distro_identifier_rootfs(root_resource, codebase)
if namespace:
package.namespace = namespace

package.namespace = namespace
package_uid = package.refresh_and_get_package_uid()

# tag files from refs
resources = []
missing_file_references = []
# a file ref extends from the root of the filesystem
for ref in package.file_references:
ref_path = '/'.join([root_path, ref.path])
ref_path = '/'.join([root_resource.path, ref.path])
res = codebase.get_resource(ref_path)
if not res:
missing_file_references.append(ref)
Expand All @@ -216,8 +191,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
datasource_id=package_data.datasource_id,
package_uid=package_uid,
):
if not dep.namespace:
dep.namespace = namespace
dep.update_namespace(namespace)
yield dep

for resource in resources:
Expand Down Expand Up @@ -424,19 +398,18 @@ def parse(cls, location, package_only=False):
@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):

levels_up = len('var/lib/rpmmanifest/container-manifest-2'.split('/'))
root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)
root_resource = cls.get_root_resource_for_rootfs(resource, codebase)
package_name = package_data.name

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
package_uid = package.package_uid
namespace = cls.get_distro_identifier_rootfs(root_resource, codebase)
if namespace:
package.namespace = namespace

package_uid = package.refresh_and_get_package_uid()

assemblable_paths = tuple(set([
f'*usr/share/licenses/{package_name}/COPYING*',
Expand Down

0 comments on commit c9b9175

Please sign in to comment.