Skip to content

Commit

Permalink
add tl8-flag: flags changed sections in translations as out-of-date
Browse files Browse the repository at this point in the history
More precisely speaking, it updates unchanged sections to be marked as current.

related to i3/i3#4327
  • Loading branch information
stapelberg committed Jan 27, 2021
1 parent 3447c2c commit 6f6b1c6
Show file tree
Hide file tree
Showing 4 changed files with 397 additions and 0 deletions.
256 changes: 256 additions & 0 deletions cmd/tl8-flag/tl8flag.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
package main

import (
"bytes"
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
"sort"
"strings"

"github.com/google/go-cmp/cmp"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
)

type heading struct {
Line int
ID string
Translated string
}

type section struct {
Heading heading
Lines []string
}

type document struct {
Version string
sections []section
sectionsByID map[string]section
headings []heading
headingsByID map[string]heading
}

func segment(source []byte) (*document, error) {
// TODO: de-duplicate these goldmark.New() calls into an internal/ package
md := goldmark.New(
// GFM is GitHub Flavored Markdown, which we need for tables, for
// example.
goldmark.WithExtensions(extension.GFM),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
// The Attribute option allows us to id, classes, and arbitrary
// options on headings (for translation status).
parser.WithAttribute(),
),
goldmark.WithRendererOptions(
html.WithHardWraps(),
html.WithXHTML(),
),
)
parser := md.Parser()
rd := text.NewReader(source)
root := parser.Parse(rd)

// modeled after (go/token).File:
var lineoffsets []int // lines contains the offset of the first character for each line (the first entry is always 0)

processed := 0
for {
lineoffsets = append(lineoffsets, processed)
idx := bytes.IndexByte(source[processed:], '\n')
if idx == -1 {
break
}
processed += idx + 1
}

doc := &document{}

var headings []heading
headingsByID := make(map[string]heading)
err := ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}
if n.Kind() == ast.KindHeading {
var h heading
for _, attr := range n.Attributes() {
b, ok := attr.Value.([]byte)
if !ok {
continue
}
val := string(b)
switch string(attr.Name) {
case "id":
h.ID = val
case "translated":
h.Translated = val
case "version":
doc.Version = val
}
}
if h.ID == "" {
//return ast.WalkStop, fmt.Errorf("heading does not have id")
}
segments := n.Lines()
first := segments.At(0)
line := sort.Search(len(lineoffsets), func(i int) bool {
return lineoffsets[i] > first.Start
}) - 1
if line < 0 {
return ast.WalkStop, fmt.Errorf("BUG: could not find line offset for position %d", first.Start)
}
h.Line = line + 1
headings = append(headings, h)
headingsByID[h.ID] = h
}
return ast.WalkContinue, nil
})
if err != nil {
return nil, err
}

var sections []section
sectionsByID := make(map[string]section)
// Split the document into lines, then segment the lines into sections based
// on the headers.
lines := strings.Split(string(source), "\n")
for idx, h := range headings {
end := len(lines) - 1
if idx < len(headings)-1 {
end = headings[idx+1].Line - 1
}
s := section{
Heading: h,
Lines: lines[h.Line:end],
}
sectionsByID[h.ID] = s
sections = append(sections, s)
}

doc.sections = sections
doc.headings = headings
doc.headingsByID = headingsByID
doc.sectionsByID = sectionsByID
return doc, nil
}

// fn is e.g. userguide.markdown
func flag1(fn, oldPath string) error {
path, err := filepath.Abs(fn)
if err != nil {
return err
}
currentSource, err := ioutil.ReadFile(path)
if err != nil {
return err
}
current, err := segment(currentSource)
if err != nil {
return err
}

oldSource, err := ioutil.ReadFile(oldPath)
if err != nil {
return err
}
old, err := segment(oldSource)
if err != nil {
return err
}

unchanged := make(map[string]bool)
for _, current := range current.sections {
old, ok := old.sectionsByID[current.Heading.ID]
if !ok {
log.Printf("BUG: section %q not found in -old_path=%s", current.Heading.ID, oldPath)
continue
}
diff := cmp.Diff(old.Lines, current.Lines)
changed := diff != ""
unchanged[current.Heading.ID] = !changed
if changed {
log.Printf("changed (-old +current):\n%s", diff)
}
}

dir := filepath.Dir(path)
fis, err := ioutil.ReadDir(dir)
if err != nil {
return err
}
for _, fi := range fis {
if !fi.Mode().IsDir() || fi.Name() == "." || fi.Name() == ".." {
continue
}
translationPath := filepath.Join(dir, fi.Name(), filepath.Base(fn))
b, err := ioutil.ReadFile(translationPath)
if err != nil {
if !os.IsNotExist(err) {
log.Print(err)
}
continue
}
lines := strings.Split(string(b), "\n")
log.Printf("processing translation %s", translationPath)
translation, err := segment(b)
if err != nil {
return err
}
for _, heading := range translation.headings {
if unchanged[heading.ID] && heading.Translated != "" {
log.Printf(" updating heading %q (up-to-date)", heading.ID)
lines[heading.Line-1] = translatedRe.ReplaceAllString(lines[heading.Line-1], `translated="`+current.Version+`"`)
}
}
documentHeading := translation.headings[0]
lines[documentHeading.Line-1] = versionRe.ReplaceAllString(lines[documentHeading.Line-1], `version="`+current.Version+`"`)
if err := ioutil.WriteFile(translationPath, []byte(strings.Join(lines, "\n")), 0644); err != nil {
return err
}
}

return nil
}

var (
translatedRe = regexp.MustCompile(`translated="([^"]+)"`)
versionRe = regexp.MustCompile(`version="([^"]+)"`)
)

func tl8flag() error {
var (
oldPath = flag.String("old_path",
"",
"old version of the document")
)
flag.Parse()
if flag.NArg() != 1 {
return fmt.Errorf("syntax: %s <markdown-file>", filepath.Base(os.Args[0]))
}
if *oldPath == "" {
return fmt.Errorf("-old_path is required")
}
fn := flag.Arg(0)
if err := flag1(fn, *oldPath); err != nil {
return err
}

return nil
}

func main() {
if err := tl8flag(); err != nil {
log.Fatal(err)
}
}
137 changes: 137 additions & 0 deletions cmd/tl8-flag/tl8flag_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package main

import (
"bytes"
"io/ioutil"
"os"
"path/filepath"
"testing"

"github.com/google/go-cmp/cmp"
)

func TestSegment(t *testing.T) {
source := []byte(`# document
A paragraph,
which spans multiple lines.
## first heading {#first translated="4_18"}
`)
doc, err := segment(source)
if err != nil {
t.Fatal(err)
}

headingDocument := heading{
Line: 1,
ID: "document",
Translated: "",
}
headingFirst := heading{
Line: 6,
ID: "first",
Translated: "4_18",
}
wantHeadings := []heading{
headingDocument,
headingFirst,
}
if diff := cmp.Diff(wantHeadings, doc.headings); diff != "" {
t.Errorf("unexpected headings: diff (-want +got):\n%s", diff)
}

wantSections := []section{
{
Heading: headingDocument,
Lines: []string{
"",
"A paragraph,",
"which spans multiple lines.",
"",
},
},
{
Heading: headingFirst,
Lines: []string{},
},
}
if diff := cmp.Diff(wantSections, doc.sections); diff != "" {
t.Errorf("unexpected sections: diff (-want +got):\n%s", diff)
}
}

func TestFlag(t *testing.T) {
oldSource := []byte(`# document {version="4_18"}
Introduction.
## first heading {#first}
Old explanation.
## second heading {#second}
Unchanged explanation.
`)
frenchOldSource := []byte(`# document {version="4_18"}
Introduction.
## premier titre {#first translated="4_18"}
Ancienne explication.
## deuxième rubrique {#second translated="4_18"}
Explication inchangée.
`)

tmp := t.TempDir()
newSource := bytes.ReplaceAll(oldSource, []byte("Old"), []byte("New"))
newSource = bytes.ReplaceAll(newSource, []byte(`version="4_18"`), []byte(`version="4_19"`))
fn := filepath.Join(tmp, "userguide.markdown")
if err := ioutil.WriteFile(fn, newSource, 0644); err != nil {
t.Fatal(err)
}
frenchFn := filepath.Join(tmp, "fr", "userguide.markdown")
if err := os.MkdirAll(filepath.Dir(frenchFn), 0755); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(frenchFn, frenchOldSource, 0644); err != nil {
t.Fatal(err)
}

oldTmp := t.TempDir()
oldFn := filepath.Join(oldTmp, "userguide.markdown")
if err := ioutil.WriteFile(oldFn, oldSource, 0644); err != nil {
t.Fatal(err)
}

if err := flag1(fn, oldFn); err != nil {
t.Fatal(err)
}

updatedFrenchSource, err := ioutil.ReadFile(frenchFn)
if err != nil {
t.Fatal(err)
}
// document and second heading should be updated,
// first heading should not be updated (→ out of date)
wantFrenchSource := []byte(`# document {version="4_19"}
Introduction.
## premier titre {#first translated="4_18"}
Ancienne explication.
## deuxième rubrique {#second translated="4_19"}
Explication inchangée.
`)

if diff := cmp.Diff(wantFrenchSource, updatedFrenchSource); diff != "" {
t.Errorf("unexpected french translation update: diff (-want +got):\n%s", diff)
}
}
Loading

0 comments on commit 6f6b1c6

Please sign in to comment.