Skip to content

update copyright.sh

Martijn Dekker edited this page Jul 30, 2022 · 1 revision

update-copyright.sh

This script helps maintain proper copyright attribution in the ksh 93u+m code base. It:

  1. Updates the COPYRIGHT file with all the contributors to ksh 93u+m since the reboot, based on the commit history. The authors are sorted by number of commits in descending order and inserted under the first CONTRIBUTORS header in that file.
  2. Updates the ksh 93u+m (c) YEAR-YEAR Contributors to ksh 93u+m notices in the copyright headers and --about messages. The second year is updated to the current year.
  3. Adds any missing contributors to each file to that file's copyright header based on that file's commit history.

The script does not stage or commit the changes it makes. 2 and 3 are only done for files that have changed since the new year, including files that have uncommitted or unstaged changes.

set -fCu; IFS=''	# safe mode
PATH=/opt/ast/bin:$PATH	# use path-bound built-ins

error_out()
{
	echo "$0: $@" >&2
	exit 1
}
PATH=/dev/null let ".sh.version >= 20220728" 2>/dev/null || error_out 'requires ksh 93u+m/1.0'

# special-case github's flukes in names of commit authors
# (they were fine in the original commits, then merging/squashing changed them)
function fix_authorname
{
	nameref a=$1
	case $a in
	lev105)		a='Lev Kujawski' ;;
	pghvlaans)	a='K. Eugene Carlson' ;;
	sterlingjensen)	a='Sterling Jensen' ;;
	vmihalko)	a='Vincent Mihalkovic' ;;
	esac
}

# update file if it changed from $tmpfile
function update_file
{
	[[ -s $1 ]] && cmp -s $1 $tmpfile
	case $? in
	0)	;;
	1)	print -r "updating $1"
		cat $tmpfile >|$1 || exit ;;
	*)	error_out "error in cmp" ;;
	esac
}

# centre an author line to 70 characters
function centre_line
{
	nameref l=$1
	((${#l} > 70)) && prinf '%q: WARNING: author line too long: %q\n' "$0" "$l" >&2
	while ((${#l} < 69)); do
		l=" $l "
	done
	((${#l}==69)) && l+=' '
}

[[ -d .git ]] || error_out "run this from the ksh 93u+m git repo's main directory"

current_year=$(date +%Y)
[[ $current_year =~ ^[0-9]{4}$ ]] || error_out "could not get current year"
first_commit=$(git log --since="$current_year-01-01 00:00 UTC" --pretty=format:'%H' | tail -n1)
[[ $first_commit =~ ^[0-9a-f]{40}$ ]] || error_out "could not get first commit"
current_commit=$(git log -1 --pretty=format:'%H')
[[ $current_commit =~ ^[0-9a-f]{40}$ ]] || error_out "could not get current commit"

# get author/email associations from log, overriding a few
typeset -A email
git log reboot..dev '--pretty=tformat:%ae %an' | while IFS=' ' read e n; do
	fix_authorname n
	email[$n]=$e
done
email['Anuradha Weeraman'][email protected]
email['Lev Kujawski'][email protected]
email['K. Eugene Carlson'][email protected]
email['Vincent Mihalkovic'][email protected]

tmpfile=${TMPDIR:-/tmp}/update-copyright-years.${$}${RANDOM}
trap 'exec rm -f $tmpfile' EXIT

# update COPYRIGHT file
# get all the authors from the git log, sorted by number of commits in descending order
# (awk cannot portably pass strings with newlines via -v options, so pass as an env var)
export contributors=$(
	git log reboot..dev '--pretty=tformat:%an' | while read n; do
		fix_authorname n
		print -r $n
	done | sort | uniq -c | sort -rn | while IFS=' ' read throwaway_number name; do
		c="$name <${email[$name]}>"
		centre_line c
		print -r "#${c}#"
	done
)
awk '{
	if(!state) { print; if($0 ~ /CONTRIBUTORS/) state=1; }
	else if(state==1) { print; spacingline=$0; state=2; }
	else if(state==2) { print ENVIRON["contributors"]; state=3; }
	else if(state==3) { if($0==spacingline) { print; state=4; } }
	else print;
}' COPYRIGHT >|$tmpfile
unset contributors
update_file COPYRIGHT

# update headers in source files
(git diff --name-only; git diff --name-only --cached; git diff-tree --name-only -r $first_commit..HEAD) \
| while read -r file
do
	[[ -f $file ]] || continue
	read -r n <$file
	[[ $n == '/***********************************************************************' \
	|| $n == '########################################################################' ]] \
		|| continue
	sedscript=''
	# exclude version.h from adding author names -- most changes to it are just version increments
	if [[ $file != src/cmd/ksh93/include/version.h ]]
	then
		# comment character
		cc=${n:2:1}
		# find the line number of the last author line (after which to insert new authors)
		n="^[$cc] .*<.*@.*\..*>.* [$cc]$"
		alineno=$(awk -v ere=$n '{ if($0 ~ ere) found++; else if(found) { print NR-1; exit; } }' "$file")
		((alineno > 0)) || continue
		# get authors, most frequent contributor first
		git log $first_commit..HEAD --pretty=tformat:%an "$file" | sort | uniq -c | sort -rn \
		| while IFS=' ' read -r throwaway_number name
		do
			fix_authorname name
			aline="$name <${email[$name]}>"
			centre_line aline
			# if not already in file, add to sed script
			if ! grep -q "^[$cc]$aline[$cc]$" "$file"; then
				sedscript=${sedscript:-"$alineno { "}$'a\\\n'${cc}${aline}${cc}$'\n'
			fi
		done
		[[ -n $sedscript ]] && sedscript+=$'}\n'
	fi
	# add sed command to update copyright line
	sedscript+="/(c) [0-9]\{4\}-[0-9]\{4\} Contributors to ksh 93u+m/ s/-[0-9]\{4\}/-$current_year/"
	# update the file if the sed script makes a difference
	sed "$sedscript" $file >|$tmpfile || exit
	update_file $file
done
Clone this wiki locally