Skip to content

Commit

Permalink
move tag_test from template to core
Browse files Browse the repository at this point in the history
  • Loading branch information
flammie committed Sep 5, 2024
1 parent 4396a04 commit 8f04acf
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
/scripts/accept-all-lemmas.sh
/scripts/run-morph-tester.sh
/scripts/run-yaml-testcases.sh
/scripts/tag_test.sh
Makefile
Makefile.in
__pycache__/
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ nobase_dist_pkgdata_SCRIPTS = \
scripts/run_voikko_speller.sh \
scripts/scripted-memusg.sh \
scripts/spell-preprocess.pl \
scripts/tag_test.sh \
scripts/flaglist2make_flag_regex.sh \
scripts/taglist2insert_semantic_tags_after_pos_regex.sh \
scripts/taglist2make_CG_tags_regex.sh \
Expand Down
4 changes: 3 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
AC_INIT([giella-core], [1.0.3], [[email protected]], [giella-core], [https://github.com/giellalt/giella-core])
AC_INIT([giella-core], [1.0.4], [[email protected]], [giella-core], [https://github.com/giellalt/giella-core])
AC_REVISION([$Revision$])
AC_CONFIG_AUX_DIR([build-aux])
AM_INIT_AUTOMAKE([1.9 tar-pax -Wall -Werror foreign])
Expand Down Expand Up @@ -96,6 +96,8 @@ AC_CONFIG_FILES([scripts/generate-lemmas.sh],
[chmod +x scripts/generate-lemmas.sh])
AC_CONFIG_FILES([scripts/accept-all-lemmas.sh],
[chmod +x scripts/accept-all-lemmas.sh])
AC_CONFIG_FILES([scripts/tag_test.sh],
[chmod +x scripts/tag_test.sh])

AC_OUTPUT

Expand Down
14 changes: 7 additions & 7 deletions scripts/extract-used-tags.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,23 @@
# It is not intended to be used on its own.

cut -d'!' -f1 | # get rid of comments
grep ';' | # Get only lines with
grep ';' | # Get only lines with
grep -v -E '^[0-9A-Za-z_-]+\s+;' | # get rid of entries with only contlex
sed 's/\"[^"]*\"//' | # get rid of quoted info strings
sed 's/"[^"]*"//g' | # get rid of quoted info strings
tr '\t' ' ' | # translate all tabs to spaces
tr -s ' ' | # squeeze all spaces
sed 's/ [0-9A-Za-z/#_-]* ;//' | # get rid of all contlexes
cut -d':' -f1 | # get rid of the surface side
egrep '([@+][0-9A-Za-z][[0-9A-Za-z]@+])' | # Only keep interesting strings
grep -E '[@+][0-9A-Za-z][0-9A-Za-z@+]' | # Only keep interesting strings
sed 's/@@/@€@/g' | # insert newline placeholder between flag diacritics
tr '€#"' '\n' | # insert newlines now, for cleaner data for the next steps
sed '/^[^@+]/ s/^[^@+]*//' | # remove non-tag text at beginning of line
sed 's/^\(@[^@]*@\)/\1€/' | # insert newline placeholer after initial flag diacritic
tr '' '\n' | # insert newline for cleaner data for next step
egrep '[@+].' | # Only keep intersting stuff
grep -E '[@+].' | # Only keep intersting stuff
sed '/^\+/ s/\+/€+/g' | # if begins with +, insert newlines before + (suffix tags)
sed '/\+$/ s/\+/\+€/g' | # if ends with +, insert newlines after + (prefix tags)
tr '' '\n' | # insert newlines
grep '[\+@]' | # keep only interesting stuff
sed 's/\-$//'
# Get rid of final hyphens, they are bogus
grep '[\+@].' | # keep only interesting stuff
sed 's/[-%]*$//'
# Get rid of final stuff that is odd
42 changes: 42 additions & 0 deletions scripts/tag_test.sh.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Check if there are tags which are not declared in root.lexc or if
# tags are misspelled.
#
# Exit with 0 if and only if all tests pass.

if test $# != 1 ; then
echo "Usage: $0 lexicon.lexc"
exit 1
fi
if ! test -f "$1" ; then
echo "missing combined lexicon $1"
exit 2
fi

lexctags=$(mktemp -t giella-tag_test.lexc.XXXXXXXXXXX)
roottags=$(mktemp -t giella-tag_test.root.XXXXXXXXXXX)

# Get giella-core from the test environment:
giella_core=$GIELLA_CORE

# Extract USED tags:
sed -e '1,/LEXICON Root/d' < "$1" | # Extract all lines after LEXICON Root
"${giella_core}"/scripts/extract-used-tags.sh |
LC_ALL=no_NO.UTF8 sort -u \
> "${lexctags}"

# Extract DEFINED tags:
sed -n '/LEXICON Root/q;p' < "$1" | # Extract all lines before LEXICON Root
"${giella_core}"/scripts/extract-defined-tags.sh |
LC_ALL=no_NO.UTF8 sort -u \
> "${roottags}"

# Compare the two sets of tags, report and fail if there is a diff:
check=$(LC_ALL=no_NO.UTF8 comm -23 "${lexctags}" "${roottags}")
if [[ -n "${check}" ]]; then
echo "FAIL: Have a look at these:"
echo "${check}"
exit 1
else
echo "PASS: No errors found."
fi

0 comments on commit 8f04acf

Please sign in to comment.