From 6f6b1c634e49d156296598533d7b74b1153672a8 Mon Sep 17 00:00:00 2001 From: Michael Stapelberg Date: Wed, 27 Jan 2021 09:48:59 +0100 Subject: [PATCH] add tl8-flag: flags changed sections in translations as out-of-date More precisely speaking, it updates unchanged sections to be marked as current. related to https://github.com/i3/i3/issues/4327 --- cmd/tl8-flag/tl8flag.go | 256 +++++++++++++++++++++++++++++++++++ cmd/tl8-flag/tl8flag_test.go | 137 +++++++++++++++++++ go.mod | 1 + go.sum | 3 + 4 files changed, 397 insertions(+) create mode 100644 cmd/tl8-flag/tl8flag.go create mode 100644 cmd/tl8-flag/tl8flag_test.go diff --git a/cmd/tl8-flag/tl8flag.go b/cmd/tl8-flag/tl8flag.go new file mode 100644 index 0000000..2b1be9d --- /dev/null +++ b/cmd/tl8-flag/tl8flag.go @@ -0,0 +1,256 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "io/ioutil" + "log" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/google/go-cmp/cmp" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" +) + +type heading struct { + Line int + ID string + Translated string +} + +type section struct { + Heading heading + Lines []string +} + +type document struct { + Version string + sections []section + sectionsByID map[string]section + headings []heading + headingsByID map[string]heading +} + +func segment(source []byte) (*document, error) { + // TODO: de-duplicate these goldmark.New() calls into an internal/ package + md := goldmark.New( + // GFM is GitHub Flavored Markdown, which we need for tables, for + // example. + goldmark.WithExtensions(extension.GFM), + goldmark.WithParserOptions( + parser.WithAutoHeadingID(), + // The Attribute option allows us to id, classes, and arbitrary + // options on headings (for translation status). + parser.WithAttribute(), + ), + goldmark.WithRendererOptions( + html.WithHardWraps(), + html.WithXHTML(), + ), + ) + parser := md.Parser() + rd := text.NewReader(source) + root := parser.Parse(rd) + + // modeled after (go/token).File: + var lineoffsets []int // lines contains the offset of the first character for each line (the first entry is always 0) + + processed := 0 + for { + lineoffsets = append(lineoffsets, processed) + idx := bytes.IndexByte(source[processed:], '\n') + if idx == -1 { + break + } + processed += idx + 1 + } + + doc := &document{} + + var headings []heading + headingsByID := make(map[string]heading) + err := ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + if n.Kind() == ast.KindHeading { + var h heading + for _, attr := range n.Attributes() { + b, ok := attr.Value.([]byte) + if !ok { + continue + } + val := string(b) + switch string(attr.Name) { + case "id": + h.ID = val + case "translated": + h.Translated = val + case "version": + doc.Version = val + } + } + if h.ID == "" { + //return ast.WalkStop, fmt.Errorf("heading does not have id") + } + segments := n.Lines() + first := segments.At(0) + line := sort.Search(len(lineoffsets), func(i int) bool { + return lineoffsets[i] > first.Start + }) - 1 + if line < 0 { + return ast.WalkStop, fmt.Errorf("BUG: could not find line offset for position %d", first.Start) + } + h.Line = line + 1 + headings = append(headings, h) + headingsByID[h.ID] = h + } + return ast.WalkContinue, nil + }) + if err != nil { + return nil, err + } + + var sections []section + sectionsByID := make(map[string]section) + // Split the document into lines, then segment the lines into sections based + // on the headers. + lines := strings.Split(string(source), "\n") + for idx, h := range headings { + end := len(lines) - 1 + if idx < len(headings)-1 { + end = headings[idx+1].Line - 1 + } + s := section{ + Heading: h, + Lines: lines[h.Line:end], + } + sectionsByID[h.ID] = s + sections = append(sections, s) + } + + doc.sections = sections + doc.headings = headings + doc.headingsByID = headingsByID + doc.sectionsByID = sectionsByID + return doc, nil +} + +// fn is e.g. userguide.markdown +func flag1(fn, oldPath string) error { + path, err := filepath.Abs(fn) + if err != nil { + return err + } + currentSource, err := ioutil.ReadFile(path) + if err != nil { + return err + } + current, err := segment(currentSource) + if err != nil { + return err + } + + oldSource, err := ioutil.ReadFile(oldPath) + if err != nil { + return err + } + old, err := segment(oldSource) + if err != nil { + return err + } + + unchanged := make(map[string]bool) + for _, current := range current.sections { + old, ok := old.sectionsByID[current.Heading.ID] + if !ok { + log.Printf("BUG: section %q not found in -old_path=%s", current.Heading.ID, oldPath) + continue + } + diff := cmp.Diff(old.Lines, current.Lines) + changed := diff != "" + unchanged[current.Heading.ID] = !changed + if changed { + log.Printf("changed (-old +current):\n%s", diff) + } + } + + dir := filepath.Dir(path) + fis, err := ioutil.ReadDir(dir) + if err != nil { + return err + } + for _, fi := range fis { + if !fi.Mode().IsDir() || fi.Name() == "." || fi.Name() == ".." { + continue + } + translationPath := filepath.Join(dir, fi.Name(), filepath.Base(fn)) + b, err := ioutil.ReadFile(translationPath) + if err != nil { + if !os.IsNotExist(err) { + log.Print(err) + } + continue + } + lines := strings.Split(string(b), "\n") + log.Printf("processing translation %s", translationPath) + translation, err := segment(b) + if err != nil { + return err + } + for _, heading := range translation.headings { + if unchanged[heading.ID] && heading.Translated != "" { + log.Printf(" updating heading %q (up-to-date)", heading.ID) + lines[heading.Line-1] = translatedRe.ReplaceAllString(lines[heading.Line-1], `translated="`+current.Version+`"`) + } + } + documentHeading := translation.headings[0] + lines[documentHeading.Line-1] = versionRe.ReplaceAllString(lines[documentHeading.Line-1], `version="`+current.Version+`"`) + if err := ioutil.WriteFile(translationPath, []byte(strings.Join(lines, "\n")), 0644); err != nil { + return err + } + } + + return nil +} + +var ( + translatedRe = regexp.MustCompile(`translated="([^"]+)"`) + versionRe = regexp.MustCompile(`version="([^"]+)"`) +) + +func tl8flag() error { + var ( + oldPath = flag.String("old_path", + "", + "old version of the document") + ) + flag.Parse() + if flag.NArg() != 1 { + return fmt.Errorf("syntax: %s ", filepath.Base(os.Args[0])) + } + if *oldPath == "" { + return fmt.Errorf("-old_path is required") + } + fn := flag.Arg(0) + if err := flag1(fn, *oldPath); err != nil { + return err + } + + return nil +} + +func main() { + if err := tl8flag(); err != nil { + log.Fatal(err) + } +} diff --git a/cmd/tl8-flag/tl8flag_test.go b/cmd/tl8-flag/tl8flag_test.go new file mode 100644 index 0000000..aa35def --- /dev/null +++ b/cmd/tl8-flag/tl8flag_test.go @@ -0,0 +1,137 @@ +package main + +import ( + "bytes" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestSegment(t *testing.T) { + source := []byte(`# document + +A paragraph, +which spans multiple lines. + +## first heading {#first translated="4_18"} +`) + doc, err := segment(source) + if err != nil { + t.Fatal(err) + } + + headingDocument := heading{ + Line: 1, + ID: "document", + Translated: "", + } + headingFirst := heading{ + Line: 6, + ID: "first", + Translated: "4_18", + } + wantHeadings := []heading{ + headingDocument, + headingFirst, + } + if diff := cmp.Diff(wantHeadings, doc.headings); diff != "" { + t.Errorf("unexpected headings: diff (-want +got):\n%s", diff) + } + + wantSections := []section{ + { + Heading: headingDocument, + Lines: []string{ + "", + "A paragraph,", + "which spans multiple lines.", + "", + }, + }, + { + Heading: headingFirst, + Lines: []string{}, + }, + } + if diff := cmp.Diff(wantSections, doc.sections); diff != "" { + t.Errorf("unexpected sections: diff (-want +got):\n%s", diff) + } +} + +func TestFlag(t *testing.T) { + oldSource := []byte(`# document {version="4_18"} + +Introduction. + +## first heading {#first} + +Old explanation. + +## second heading {#second} + +Unchanged explanation. +`) + frenchOldSource := []byte(`# document {version="4_18"} + +Introduction. + +## premier titre {#first translated="4_18"} + +Ancienne explication. + +## deuxième rubrique {#second translated="4_18"} + +Explication inchangée. +`) + + tmp := t.TempDir() + newSource := bytes.ReplaceAll(oldSource, []byte("Old"), []byte("New")) + newSource = bytes.ReplaceAll(newSource, []byte(`version="4_18"`), []byte(`version="4_19"`)) + fn := filepath.Join(tmp, "userguide.markdown") + if err := ioutil.WriteFile(fn, newSource, 0644); err != nil { + t.Fatal(err) + } + frenchFn := filepath.Join(tmp, "fr", "userguide.markdown") + if err := os.MkdirAll(filepath.Dir(frenchFn), 0755); err != nil { + t.Fatal(err) + } + if err := ioutil.WriteFile(frenchFn, frenchOldSource, 0644); err != nil { + t.Fatal(err) + } + + oldTmp := t.TempDir() + oldFn := filepath.Join(oldTmp, "userguide.markdown") + if err := ioutil.WriteFile(oldFn, oldSource, 0644); err != nil { + t.Fatal(err) + } + + if err := flag1(fn, oldFn); err != nil { + t.Fatal(err) + } + + updatedFrenchSource, err := ioutil.ReadFile(frenchFn) + if err != nil { + t.Fatal(err) + } + // document and second heading should be updated, + // first heading should not be updated (→ out of date) + wantFrenchSource := []byte(`# document {version="4_19"} + +Introduction. + +## premier titre {#first translated="4_18"} + +Ancienne explication. + +## deuxième rubrique {#second translated="4_19"} + +Explication inchangée. +`) + + if diff := cmp.Diff(wantFrenchSource, updatedFrenchSource); diff != "" { + t.Errorf("unexpected french translation update: diff (-want +got):\n%s", diff) + } +} diff --git a/go.mod b/go.mod index 37fa9e4..ce17ef8 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.15 require ( github.com/Kunde21/markdownfmt/v2 v2.0.3 + github.com/google/go-cmp v0.5.4 github.com/google/renameio v1.0.0 github.com/yuin/goldmark v1.3.1 ) diff --git a/go.sum b/go.sum index e7f2ec0..da70dbb 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/Kunde21/markdownfmt/v2 v2.0.3 h1:8Xs3GX5hVK0+AiVTXb2xvr8zvrl8Z8DQXOqmA6sXhjk= github.com/Kunde21/markdownfmt/v2 v2.0.3/go.mod h1:50JNMOFTYtR8g1f+U8BZlw0M9RL5ZUqjOxxTgITeyrg= +github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/renameio v1.0.0 h1:xhp2CnJmgQmpJU4RY8chagahUq5mbPPAbiSQstKpVMA= github.com/google/renameio v1.0.0/go.mod h1:t/HQoYBZSsWSNK35C6CO/TpPLDVWvxOHboWUAweKUpk= github.com/mattn/go-runewidth v0.0.3 h1:a+kO+98RDGEfo6asOGMmpodZq4FNtnGP54yps8BzLR4= @@ -7,3 +9,4 @@ github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzp github.com/yuin/goldmark v1.1.24/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.1 h1:eVwehsLsZlCJCwXyGLgg+Q4iFWE/eTIMG0e8waCmm/I= github.com/yuin/goldmark v1.3.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=