Skip to content

Commit

Permalink
add tl8-new, refactor common code into internal/tl8
Browse files Browse the repository at this point in the history
  • Loading branch information
stapelberg committed Jan 28, 2021
1 parent 6f6b1c6 commit 2d56c27
Show file tree
Hide file tree
Showing 5 changed files with 268 additions and 146 deletions.
147 changes: 8 additions & 139 deletions cmd/tl8-flag/tl8flag.go
Original file line number Diff line number Diff line change
@@ -1,150 +1,19 @@
package main

import (
"bytes"
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"translation/internal/tl8"

"github.com/google/go-cmp/cmp"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
)

type heading struct {
Line int
ID string
Translated string
}

type section struct {
Heading heading
Lines []string
}

type document struct {
Version string
sections []section
sectionsByID map[string]section
headings []heading
headingsByID map[string]heading
}

func segment(source []byte) (*document, error) {
// TODO: de-duplicate these goldmark.New() calls into an internal/ package
md := goldmark.New(
// GFM is GitHub Flavored Markdown, which we need for tables, for
// example.
goldmark.WithExtensions(extension.GFM),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
// The Attribute option allows us to id, classes, and arbitrary
// options on headings (for translation status).
parser.WithAttribute(),
),
goldmark.WithRendererOptions(
html.WithHardWraps(),
html.WithXHTML(),
),
)
parser := md.Parser()
rd := text.NewReader(source)
root := parser.Parse(rd)

// modeled after (go/token).File:
var lineoffsets []int // lines contains the offset of the first character for each line (the first entry is always 0)

processed := 0
for {
lineoffsets = append(lineoffsets, processed)
idx := bytes.IndexByte(source[processed:], '\n')
if idx == -1 {
break
}
processed += idx + 1
}

doc := &document{}

var headings []heading
headingsByID := make(map[string]heading)
err := ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}
if n.Kind() == ast.KindHeading {
var h heading
for _, attr := range n.Attributes() {
b, ok := attr.Value.([]byte)
if !ok {
continue
}
val := string(b)
switch string(attr.Name) {
case "id":
h.ID = val
case "translated":
h.Translated = val
case "version":
doc.Version = val
}
}
if h.ID == "" {
//return ast.WalkStop, fmt.Errorf("heading does not have id")
}
segments := n.Lines()
first := segments.At(0)
line := sort.Search(len(lineoffsets), func(i int) bool {
return lineoffsets[i] > first.Start
}) - 1
if line < 0 {
return ast.WalkStop, fmt.Errorf("BUG: could not find line offset for position %d", first.Start)
}
h.Line = line + 1
headings = append(headings, h)
headingsByID[h.ID] = h
}
return ast.WalkContinue, nil
})
if err != nil {
return nil, err
}

var sections []section
sectionsByID := make(map[string]section)
// Split the document into lines, then segment the lines into sections based
// on the headers.
lines := strings.Split(string(source), "\n")
for idx, h := range headings {
end := len(lines) - 1
if idx < len(headings)-1 {
end = headings[idx+1].Line - 1
}
s := section{
Heading: h,
Lines: lines[h.Line:end],
}
sectionsByID[h.ID] = s
sections = append(sections, s)
}

doc.sections = sections
doc.headings = headings
doc.headingsByID = headingsByID
doc.sectionsByID = sectionsByID
return doc, nil
}

// fn is e.g. userguide.markdown
func flag1(fn, oldPath string) error {
path, err := filepath.Abs(fn)
Expand All @@ -155,7 +24,7 @@ func flag1(fn, oldPath string) error {
if err != nil {
return err
}
current, err := segment(currentSource)
current, err := tl8.Segment(currentSource)
if err != nil {
return err
}
Expand All @@ -164,14 +33,14 @@ func flag1(fn, oldPath string) error {
if err != nil {
return err
}
old, err := segment(oldSource)
old, err := tl8.Segment(oldSource)
if err != nil {
return err
}

unchanged := make(map[string]bool)
for _, current := range current.sections {
old, ok := old.sectionsByID[current.Heading.ID]
for _, current := range current.Sections {
old, ok := old.SectionsByID[current.Heading.ID]
if !ok {
log.Printf("BUG: section %q not found in -old_path=%s", current.Heading.ID, oldPath)
continue
Expand Down Expand Up @@ -203,17 +72,17 @@ func flag1(fn, oldPath string) error {
}
lines := strings.Split(string(b), "\n")
log.Printf("processing translation %s", translationPath)
translation, err := segment(b)
translation, err := tl8.Segment(b)
if err != nil {
return err
}
for _, heading := range translation.headings {
for _, heading := range translation.Headings {
if unchanged[heading.ID] && heading.Translated != "" {
log.Printf(" updating heading %q (up-to-date)", heading.ID)
lines[heading.Line-1] = translatedRe.ReplaceAllString(lines[heading.Line-1], `translated="`+current.Version+`"`)
}
}
documentHeading := translation.headings[0]
documentHeading := translation.Headings[0]
lines[documentHeading.Line-1] = versionRe.ReplaceAllString(lines[documentHeading.Line-1], `version="`+current.Version+`"`)
if err := ioutil.WriteFile(translationPath, []byte(strings.Join(lines, "\n")), 0644); err != nil {
return err
Expand Down
15 changes: 8 additions & 7 deletions cmd/tl8-flag/tl8flag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os"
"path/filepath"
"testing"
"translation/internal/tl8"

"github.com/google/go-cmp/cmp"
)
Expand All @@ -18,30 +19,30 @@ which spans multiple lines.
## first heading {#first translated="4_18"}
`)
doc, err := segment(source)
doc, err := tl8.Segment(source)
if err != nil {
t.Fatal(err)
}

headingDocument := heading{
headingDocument := tl8.Heading{
Line: 1,
ID: "document",
Translated: "",
}
headingFirst := heading{
headingFirst := tl8.Heading{
Line: 6,
ID: "first",
Translated: "4_18",
}
wantHeadings := []heading{
wantHeadings := []tl8.Heading{
headingDocument,
headingFirst,
}
if diff := cmp.Diff(wantHeadings, doc.headings); diff != "" {
if diff := cmp.Diff(wantHeadings, doc.Headings); diff != "" {
t.Errorf("unexpected headings: diff (-want +got):\n%s", diff)
}

wantSections := []section{
wantSections := []tl8.Section{
{
Heading: headingDocument,
Lines: []string{
Expand All @@ -56,7 +57,7 @@ which spans multiple lines.
Lines: []string{},
},
}
if diff := cmp.Diff(wantSections, doc.sections); diff != "" {
if diff := cmp.Diff(wantSections, doc.Sections); diff != "" {
t.Errorf("unexpected sections: diff (-want +got):\n%s", diff)
}
}
Expand Down
54 changes: 54 additions & 0 deletions cmd/tl8-new/tl8new.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package main

import (
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"strings"
"translation/internal/tl8"
)

func tl8new1(fn string) error {
source, err := ioutil.ReadFile(fn)
if err != nil {
return err
}

doc, err := tl8.Segment(source)
if err != nil {
return err
}

lines := strings.Split(string(source), "\n")
for idx, heading := range doc.Headings {
if idx == 0 {
continue // skip document title heading
}
line := lines[heading.Line-1]
if strings.Contains(line, "translated=") {
return fmt.Errorf("document already contains translated= markers")
}
line = strings.Replace(line, "}", ` translated="TODO"}`, 1)
lines[heading.Line-1] = line
}

return ioutil.WriteFile(fn, []byte(strings.Join(lines, "\n")), 0644)
}

func tl8new() error {
flag.Parse()
if flag.NArg() != 1 {
return fmt.Errorf("syntax: %s <markdown-file>", filepath.Base(os.Args[0]))
}

return tl8new1(flag.Arg(0))
}

func main() {
if err := tl8new(); err != nil {
log.Fatal(err)
}
}
56 changes: 56 additions & 0 deletions cmd/tl8-new/tl8new_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package main

import (
"io/ioutil"
"path/filepath"
"testing"

"github.com/google/go-cmp/cmp"
)

func TestNew(t *testing.T) {
source := []byte(`# document {version="4_18"}
Introduction.
## first heading {#first}
Old explanation.
## second heading {#second}
Unchanged explanation.
`)
tmp := t.TempDir()
fn := filepath.Join(tmp, "userguide.markdown")
if err := ioutil.WriteFile(fn, source, 0644); err != nil {
t.Fatal(err)
}

if err := tl8new1(fn); err != nil {
t.Fatal(err)
}

wantSource := []byte(`# document {version="4_18"}
Introduction.
## first heading {#first translated="TODO"}
Old explanation.
## second heading {#second translated="TODO"}
Unchanged explanation.
`)

updatedSource, err := ioutil.ReadFile(fn)
if err != nil {
t.Fatal(err)
}

if diff := cmp.Diff(wantSource, updatedSource); diff != "" {
t.Errorf("unexpected new translation update: diff (-want +got):\n%s", diff)
}

}
Loading

0 comments on commit 2d56c27

Please sign in to comment.