#566 - Working in progess to support multiple about_resource

Signed-off-by: Chin Yeung Li <[email protected]>
aboutcode-org · Aug 7, 2024 · 6eed786 · 6eed786
1 parent f4c4db4
commit 6eed786
Show file tree

Hide file tree

Showing 8 changed files with 157 additions and 74 deletions.
diff --git a/src/attributecode/gen.py b/src/attributecode/gen.py
@@ -94,12 +94,12 @@ def check_newline_in_file_field(component):
  if k in file_fields:
  try:
  if '\n' in component[k]:
- if k == u'about_resource':
- msg = (
- "Multiple lines detected in 'about_resource' for '%s' which is not supported.") % component['about_resource']
- else:
-  msg = ("New line character detected in '%s' for '%s' which is not supported."
-  "\nPlease use ',' to declare multiple files.") % (k, component['about_resource'])
+ # if k == u'about_resource':
+ # msg = (
+ # "Multiple lines detected in 'about_resource' for '%s' which is not supported.") % component['about_resource']
+ # else:
+ msg = ("New line character detected in '%s' for '%s' which is not supported."
+ "\nPlease use ',' to declare multiple files.") % (k, component['about_resource'])
  errors.append(Error(CRITICAL, msg))
  except:
  pass
@@ -123,9 +123,6 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
  Load the inventory file at `location` for ABOUT and LICENSE files stored in
  the `base_dir`. Return a list of errors and a list of About objects
  validated against the `base_dir`.
-
- Optionally use `reference_dir` as the directory location of extra reference
- license and notice files to reuse.
  """
  errors = []
  abouts = []
@@ -164,21 +161,37 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
  for component in stripped_inv:
  if not from_attrib:
  if 'about_resource' in component:
- arp = component['about_resource']
- dup_err = check_duplicated_about_resource(arp, arp_list)
- if dup_err:
- if not dup_err in errors:
- errors.append(dup_err)
- else:
- arp_list.append(arp)
-
- invalid_about_filename = check_about_resource_filename(arp)
- if invalid_about_filename and not invalid_about_filename in errors:
- errors.append(invalid_about_filename)
+ if isinstance(component['about_resource'], str):
+ arp = component['about_resource']
+ dup_err = check_duplicated_about_resource(arp, arp_list)
+ if dup_err:
+ if dup_err not in errors:
+ errors.append(dup_err)
+ else:
+ arp_list.append(arp)
 
+ invalid_about_filename = check_about_resource_filename(arp)
+ if invalid_about_filename and invalid_about_filename not in errors:
+ errors.append(invalid_about_filename)
+ else:
+ for arp in component['about_resource']:
+ dup_err = check_duplicated_about_resource(
+ arp, arp_list)
+ if dup_err:
+ if dup_err not in errors:
+ errors.append(dup_err)
+ else:
+ arp_list.append(arp)
+
+ invalid_about_filename = check_about_resource_filename(
+ arp)
+ if invalid_about_filename and invalid_about_filename not in errors:
+ errors.append(invalid_about_filename)
+ """
  newline_in_file_err = check_newline_in_file_field(component)
  if newline_in_file_err:
  errors.extend(newline_in_file_err)
+ """
 
  if errors:
  return errors, abouts
@@ -197,50 +210,27 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
  )
  errors.append(Error(CRITICAL, msg))
  return errors, abouts
+
  # Set about file path to '' if no 'about_resource' is provided from
  # the input
  if 'about_resource' not in fields:
  afp = ''
+ about, custom_fields_list, process_errors = process_inventory(afp, fields,
+ from_attrib, base_dir, scancode, reference_dir)
+ abouts.append(about)
  else:
- afp = fields.get(model.About.ABOUT_RESOURCE_ATTR)
-
- afp = util.to_posix(afp)
- if base_dir:
- loc = join(base_dir, afp)
- else:
- loc = afp
- about = model.About(about_file_path=afp)
- about.location = loc
-
- # Update value for 'about_resource'
- # keep only the filename or '.' if it's a directory
- if 'about_resource' in fields:
- updated_resource_value = u''
- resource_path = fields['about_resource']
- if resource_path.endswith(u'/'):
- updated_resource_value = u'.'
- else:
- updated_resource_value = basename(resource_path)
- fields['about_resource'] = updated_resource_value
-
- ld_errors = about.load_dict(
- fields,
- base_dir,
- scancode=scancode,
- from_attrib=from_attrib,
- running_inventory=False,
- reference_dir=reference_dir,
- )
-
- for severity, message in ld_errors:
- if 'Custom Field' in message:
- field_name = message.replace('Custom Field: ', '').strip()
- if not field_name in custom_fields_list:
- custom_fields_list.append(field_name)
+ if scancode:
+ afp_list = [fields.get(model.About.ABOUT_RESOURCE_ATTR)]
  else:
- errors.append(Error(severity, message))
+ afp_list = fields.get(model.About.ABOUT_RESOURCE_ATTR)
+ for afp in afp_list:
+ about, custom_fields_list, process_errors = process_inventory(afp, fields,
+ from_attrib, base_dir, scancode, reference_dir)
+ abouts.append(about)
+
+ for err in process_errors:
+ errors.append(err)
 
- abouts.append(about)
  if custom_fields_list:
  custom_fields_err_msg = 'Field ' + \
  str(custom_fields_list) + ' is a custom field.'
@@ -249,6 +239,66 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
  return errors, abouts
 
 
+def process_inventory(about_file_path, fields, from_attrib, base_dir, scancode, reference_dir):
+ """
+ Return About object, a list of custom fields and a list of errors and
+ validated against the `base_dir`.
+
+ Optionally use `reference_dir` as the directory location of extra reference
+ license and notice files to reuse.
+ """
+ custom_fields_list = []
+ errors = []
+ afp = util.to_posix(about_file_path)
+ if base_dir:
+ loc = join(base_dir, afp)
+ else:
+ loc = afp
+ about = model.About(about_file_path=afp)
+ about.location = loc
+
+ """
+ # Update value for 'about_resource'
+ # keep only the filename or '.' if it's a directory
+ if 'about_resource' in fields:
+ updated_resource_list = []
+ resource_path_list = fields['about_resource']
+ for resource_path in resource_path_list:
+ if resource_path.endswith(u'/'):
+ updated_resource_list.append('.')
+ else:
+ updated_resource_list.append(basename(resource_path))
+ fields['about_resource'] = updated_resource_list
+ """
+ if 'about_resource' in fields:
+ updated_resource_value = u''
+ resource_path = about.about_file_path
+ if resource_path.endswith(u'/'):
+ updated_resource_value = u'.'
+ else:
+ updated_resource_value = basename(resource_path)
+ fields['about_resource'] = updated_resource_value
+
+ ld_errors = about.load_dict(
+ fields,
+ base_dir,
+ scancode=scancode,
+ from_attrib=from_attrib,
+ running_inventory=False,
+ reference_dir=reference_dir,
+ )
+
+ for severity, message in ld_errors:
+ if 'Custom Field' in message:
+ field_name = message.replace('Custom Field: ', '').strip()
+ if field_name not in custom_fields_list:
+ custom_fields_list.append(field_name)
+ else:
+ errors.append(Error(severity, message))
+
+ return about, custom_fields_list, errors
+
+
 def update_about_resource(self):
  pass
 
@@ -283,6 +333,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license
  scancode=scancode,
  worksheet=worksheet
  )
+
  if gen_license:
  license_dict, err = model.pre_process_and_fetch_license_dict(
  abouts, api_url=api_url, api_key=api_key)
@@ -297,7 +348,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license
  about.about_file_path = about.about_file_path.strip()
  if about.about_file_path.startswith('/'):
  about.about_file_path = about.about_file_path.lstrip('/')
- # Use the name as the ABOUT file name if about_resource is empty
+ # Use the name as the ABOUT file name if about_file_path field is empty
  if not about.about_file_path:
  about.about_file_path = about.name.value
  dump_loc = join(bdir, about.about_file_path.lstrip('/'))
@@ -319,7 +370,6 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license
  continue
 
  try:
-
  licenses_dict = {}
  if gen_license:
  # Write generated LICENSE file
@@ -344,9 +394,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license
  about.license_url.present = True
  if about.spdx_license_key.value:
  about.spdx_license_key.present = True
-
  about.dump(dump_loc, licenses_dict)
-
  if android:
  """
  Create MODULE_LICENSE_XXX and get context to create NOTICE file

diff --git a/src/attributecode/model.py b/src/attributecode/model.py
@@ -1830,14 +1830,17 @@ def about_object_to_list_of_dictionary(abouts):
  # from the output location
  if 'about_resource' in ad.keys():
  about_resource = ad['about_resource']
+ about_resource_dict = {}
  for resource in about_resource:
  updated_about_resource = posixpath.normpath(
  posixpath.join(afp_parent, resource))
  if resource == u'.':
  if not updated_about_resource == '/':
  updated_about_resource = updated_about_resource + '/'
- ad['about_resource'] = dict(
- [(updated_about_resource, None)])
+ about_resource_dict[updated_about_resource] = None
+ # about_resource_list.append(updated_about_resource)
+ # ad['about_resource'] = dict([(updated_about_resource, None)])
+ ad['about_resource'] = about_resource_dict
  del ad['about_file_path']
  serialized.append(ad)
  return serialized

diff --git a/src/attributecode/util.py b/src/attributecode/util.py
@@ -309,9 +309,13 @@ def load_csv(location):
  with open(location, mode='r', encoding='utf-8-sig',
  errors='replace') as csvfile:
  for row in csv.DictReader(csvfile):
- # convert all the column keys to lower case
- updated_row = {key.lower().strip(): value for key,
- value in row.items()}
+ updated_row = {}
+ for key, value in row.items():
+ formatted_key = key.lower().strip()
+ if formatted_key in file_fields:
+ updated_row[formatted_key] = value.splitlines()
+ else:
+ updated_row[formatted_key] = value
  results.append(updated_row)
  return results
 
@@ -545,8 +549,10 @@ def ungroup_licenses(licenses):
  return lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score, lic_matched_text
 
 
-# FIXME: add docstring
 def format_about_dict_output(about_dictionary_list):
+ """
+ Format the dictionary list to be able to write to a CSV output
+ """
  formatted_list = []
  for element in about_dictionary_list:
  row_list = dict()
@@ -562,8 +568,10 @@ def format_about_dict_output(about_dictionary_list):
  return formatted_list
 
 
-# FIXME: add docstring
 def format_about_dict_for_json_output(about_dictionary_list):
+ """
+ Format the dictionary list to be able to write to a JSON output
+ """
  licenses = ['license_key', 'license_name', 'license_file', 'license_url']
  json_formatted_list = []
  for element in about_dictionary_list:
@@ -812,7 +820,10 @@ def strip_inventory_value(inventory):
  for component in inventory:
  comp_dict = {}
  for key in component:
- comp_dict[key] = str(component[key]).strip()
+ if isinstance(component[key], str):
+ comp_dict[key] = component[key].strip()
+ else:
+ comp_dict[key] = component[key]
  stripped_inventory.append(comp_dict)
  return stripped_inventory
 

diff --git a/tests/test_attrib.py b/tests/test_attrib.py
@@ -245,8 +245,6 @@ def test_scancode_input_dup_lic_match(self):
  test_file = get_test_loc(
  'test_attrib/scancode_input/sc-dup-lic-match.json')
  errors, abouts = gen.load_inventory(test_file, scancode=True)
- print("############################")
- print(errors)
  # Check if there is error's level > INFO
  result = [(level, e) for level, e in errors if level > INFO]
  assert result == []
@@ -272,6 +270,10 @@ def test_scancode_input_dup_lic_match(self):
  # expected doesn't work well, it works after removed all the newline and spaces
  # assert expected == result
  # assert expected.splitlines(False) == result.splitlines(False)
+ with open("C:\\Users\\thoma\\Desktop\\tmp\\AbcTK\\566\\about\\result.html", 'w') as result_file:
+ result_file.write(result)
+ with open("C:\\Users\\thoma\\Desktop\\tmp\\AbcTK\\566\\about\\expected.html", 'w') as expected_file:
+ expected_file.write(expected)
  assert expected.replace('\n', '').replace(' ', '').replace(
  '\t', '') == result.replace('\n', '').replace(' ', '').replace('\t', '')
 

diff --git a/tests/test_model.py b/tests/test_model.py
@@ -1053,6 +1053,18 @@ def test_write_output_csv_with_multiple_files(self):
  expected = get_test_loc('test_model/multiple_files_expected.csv')
  check_csv(expected, result)
 
+ def test_write_output_csv_with_multiple_about_resource(self):
+ path = 'test_model/multiple_about_resource.ABOUT'
+ test_file = get_test_loc(path)
+ abouts = model.About(location=test_file, about_file_path=path)
+
+ result = get_temp_file()
+ model.write_output([abouts], result, format='csv')
+
+ expected = get_test_loc(
+ 'test_model/multiple_about_resource_expected.csv')
+ check_csv(expected, result)
+
  def test_write_output_json(self):
  path = 'test_model/this.ABOUT'
  test_file = get_test_loc(path)

diff --git a/tests/testdata/test_model/expected.json b/tests/testdata/test_model/expected.json
@@ -1,7 +1,7 @@
 [
  {
- "about_resource": "/test_model/", 
- "name": "AboutCode", 
+ "about_resource": "/test_model/",
+ "name": "AboutCode",
  "version": "0.11.0"
  }
-]
+]
diff --git a/tests/testdata/test_model/multiple_about_resource.ABOUT b/tests/testdata/test_model/multiple_about_resource.ABOUT
@@ -0,0 +1,4 @@
+about_resource:
+ - .
+ - multiple_files_expected.csv
+name: multiple_about_resource
diff --git a/tests/testdata/test_model/multiple_about_resource_expected.csv b/tests/testdata/test_model/multiple_about_resource_expected.csv
@@ -0,0 +1,3 @@
+about_resource,name
+"/test_model/
+/test_model/multiple_files_expected.csv",multiple_about_resource