From 34e97d2b77196e3c0a9180584efcb7d73988d1e1 Mon Sep 17 00:00:00 2001 From: Taylor Bantle Date: Fri, 22 Nov 2024 14:32:23 -0800 Subject: [PATCH] Add schema to StatsQualifier for schema databases --- enginetest/queries/stats_queries.go | 10 ++++---- memory/stats.go | 16 ++++++++++--- sql/analyzer/costed_index_scan.go | 21 ++++++++++++----- sql/analyzer/costed_index_scan_test.go | 2 +- sql/analyzer/indexed_joins.go | 7 +++++- sql/plan/histogram.go | 17 +++++++++----- sql/planbuilder/analyze.go | 8 +++---- sql/rowexec/other.go | 1 + sql/rowexec/other_iters.go | 3 ++- sql/statistics.go | 32 ++++++++++++++++++++++---- sql/stats/iter.go | 5 ++-- 11 files changed, 89 insertions(+), 33 deletions(-) diff --git a/enginetest/queries/stats_queries.go b/enginetest/queries/stats_queries.go index 8ad5909265..496d803237 100644 --- a/enginetest/queries/stats_queries.go +++ b/enginetest/queries/stats_queries.go @@ -35,7 +35,7 @@ var StatisticsQueries = []ScriptTest{ SkipResultCheckOnServerEngine: true, // the non-interface types are not identified over the wire result Query: "SELECT * FROM information_schema.column_statistics", Expected: []sql.Row{ - {"mydb", "t", "i", stats.NewStatistic(3, 3, 0, 24, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []sql.HistogramBucket{ + {"mydb", "t", "i", stats.NewStatistic(3, 3, 0, 24, time.Now(), sql.NewStatQualifier("mydb", "", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []sql.HistogramBucket{ stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(1)}, nil, nil), stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(2)}, nil, nil), stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(3)}, nil, nil), @@ -60,7 +60,7 @@ var StatisticsQueries = []ScriptTest{ SkipResultCheckOnServerEngine: true, // the non-interface types are not identified over the wire result Query: "SELECT * FROM information_schema.column_statistics", Expected: []sql.Row{ - {"mydb", "t", "i", stats.NewStatistic(40, 40, 1, 0, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []sql.HistogramBucket{ + {"mydb", "t", "i", stats.NewStatistic(40, 40, 1, 0, time.Now(), sql.NewStatQualifier("mydb", "", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []sql.HistogramBucket{ stats.NewHistogramBucket(20, 20, 0, 1, sql.Row{float64(50)}, nil, nil), stats.NewHistogramBucket(20, 20, 0, 1, sql.Row{float64(80)}, nil, nil), }, sql.IndexClassDefault, nil), @@ -89,13 +89,13 @@ var StatisticsQueries = []ScriptTest{ SkipResultCheckOnServerEngine: true, // the non-interface types are not identified over the wire result Query: "SELECT * FROM information_schema.column_statistics", Expected: []sql.Row{ - {"mydb", "t", "i", stats.NewStatistic(3, 3, 0, 48, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []sql.HistogramBucket{ + {"mydb", "t", "i", stats.NewStatistic(3, 3, 0, 48, time.Now(), sql.NewStatQualifier("mydb", "", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []sql.HistogramBucket{ stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(1)}, nil, []sql.Row{}), stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(2)}, nil, []sql.Row{}), stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(3)}, nil, []sql.Row{}), }, sql.IndexClassDefault, nil), }, - {"mydb", "t", "j", stats.NewStatistic(3, 3, 0, 48, time.Now(), sql.NewStatQualifier("mydb", "t", "j"), []string{"j"}, []sql.Type{types.Int64}, []sql.HistogramBucket{ + {"mydb", "t", "j", stats.NewStatistic(3, 3, 0, 48, time.Now(), sql.NewStatQualifier("mydb", "", "t", "j"), []string{"j"}, []sql.Type{types.Int64}, []sql.HistogramBucket{ stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(4)}, nil, []sql.Row{}), stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(5)}, nil, []sql.Row{}), stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(6)}, nil, []sql.Row{}), @@ -117,7 +117,7 @@ var StatisticsQueries = []ScriptTest{ SkipResultCheckOnServerEngine: true, // the non-interface types are not identified over the wire result Query: "SELECT * FROM information_schema.column_statistics", Expected: []sql.Row{ - {"mydb", "t", "i", stats.NewStatistic(4, 4, 0, 32, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Float64}, []sql.HistogramBucket{ + {"mydb", "t", "i", stats.NewStatistic(4, 4, 0, 32, time.Now(), sql.NewStatQualifier("mydb", "", "t", "primary"), []string{"i"}, []sql.Type{types.Float64}, []sql.HistogramBucket{ stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(1.25)}, nil, []sql.Row{}), stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(7.5)}, nil, []sql.Row{}), stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(10.5)}, nil, []sql.Row{}), diff --git a/memory/stats.go b/memory/stats.go index 9896e7cf92..4d92aab43d 100644 --- a/memory/stats.go +++ b/memory/stats.go @@ -156,7 +156,17 @@ func (s *StatsProv) estimateStats(ctx *sql.Context, table sql.Table, keys map[st types = append(types, sch[i].Type) } - qual, err := sql.NewQualifierFromString(string(key)) + var schemaName string + if tabSch, ok := table.(sql.DatabaseSchemaTable); ok { + schemaName = tabSch.DatabaseSchema().SchemaName() + } + + var qual sql.StatQualifier + if schemaName == "" { + qual, err = sql.NewQualifierFromString(string(key)) + } else { + qual, err = sql.NewQualifierFromString(string(key)) + } if err != nil { return err } @@ -260,7 +270,7 @@ func (s *StatsProv) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []st func (s *StatsProv) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error { colsSuff := strings.Join(cols, ",") + ")" - for key, _ := range s.colStats { + for key := range s.colStats { if strings.HasPrefix(string(key), qual.String()) && strings.HasSuffix(string(key), colsSuff) { delete(s.colStats, key) } @@ -295,7 +305,7 @@ func (s *StatsProv) DataLength(ctx *sql.Context, db string, table sql.Table) (ui } func (s *StatsProv) DropDbStats(ctx *sql.Context, db string, flush bool) error { - for key, _ := range s.colStats { + for key := range s.colStats { if strings.HasPrefix(string(key), db) { delete(s.colStats, key) } diff --git a/sql/analyzer/costed_index_scan.go b/sql/analyzer/costed_index_scan.go index ee47e8380a..3f1c51e639 100644 --- a/sql/analyzer/costed_index_scan.go +++ b/sql/analyzer/costed_index_scan.go @@ -173,12 +173,17 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta if dbTab, ok := rt.UnderlyingTable().(sql.Databaseable); ok { dbName = strings.ToLower(dbTab.Database()) } - tableName := strings.ToLower(rt.UnderlyingTable().Name()) + table := rt.UnderlyingTable() + var schemaName string + if schTab, ok := table.(sql.DatabaseSchemaTable); ok { + schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) + } + tableName := strings.ToLower(table.Name()) if len(qualToStat) > 0 { // don't mix and match real and default stats for _, idx := range indexes { - qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID())) + qual := sql.NewStatQualifier(dbName, schemaName, tableName, strings.ToLower(idx.ID())) _, ok := qualToStat[qual] if !ok { qualToStat = nil @@ -188,7 +193,7 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta } for _, idx := range indexes { - qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID())) + qual := sql.NewStatQualifier(dbName, schemaName, tableName, strings.ToLower(idx.ID())) stat, ok := qualToStat[qual] if !ok { stat, err = uniformDistStatisticsForIndex(ctx, statsProv, iat, idx) @@ -1536,6 +1541,10 @@ func uniformDistStatisticsForIndex(ctx *sql.Context, statsProv sql.StatsProvider if dbTable, ok := iat.(sql.Databaseable); ok { dbName = strings.ToLower(dbTable.Database()) } + var schemaName string + if schTab, ok := iat.(sql.DatabaseSchemaTable); ok { + schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) + } tableName := strings.ToLower(iat.Name()) var sch sql.Schema @@ -1545,7 +1554,7 @@ func uniformDistStatisticsForIndex(ctx *sql.Context, statsProv sql.StatsProvider sch = iat.Schema() } - return newUniformDistStatistic(dbName, tableName, sch, idx, rowCount, avgSize) + return newUniformDistStatistic(dbName, schemaName, tableName, sch, idx, rowCount, avgSize) } func indexFds(tableName string, sch sql.Schema, idx sql.Index) (*sql.FuncDepSet, sql.ColSet, error) { @@ -1588,7 +1597,7 @@ func indexFds(tableName string, sch sql.Schema, idx sql.Index) (*sql.FuncDepSet, return sql.NewTablescanFDs(all, strictKeys, laxKeys, notNull), idxCols, nil } -func newUniformDistStatistic(dbName, tableName string, sch sql.Schema, idx sql.Index, rowCount, avgSize uint64) (sql.Statistic, error) { +func newUniformDistStatistic(dbName, schemaName, tableName string, sch sql.Schema, idx sql.Index, rowCount, avgSize uint64) (sql.Statistic, error) { tablePrefix := fmt.Sprintf("%s.", tableName) distinctCount := rowCount @@ -1615,7 +1624,7 @@ func newUniformDistStatistic(dbName, tableName string, sch sql.Schema, idx sql.I class = sql.IndexClassDefault } - qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID())) + qual := sql.NewStatQualifier(dbName, schemaName, tableName, strings.ToLower(idx.ID())) stat := stats.NewStatistic(rowCount, distinctCount, nullCount, avgSize, time.Now(), qual, cols, types, nil, class, nil) fds, idxCols, err := indexFds(tableName, sch, idx) diff --git a/sql/analyzer/costed_index_scan_test.go b/sql/analyzer/costed_index_scan_test.go index bda4196da4..c1f09daf0b 100644 --- a/sql/analyzer/costed_index_scan_test.go +++ b/sql/analyzer/costed_index_scan_test.go @@ -583,7 +583,7 @@ func TestRangeBuilder(t *testing.T) { idx = idx_1 } - stat, err := newUniformDistStatistic("mydb", testTable, sch, idx, 10, 10) + stat, err := newUniformDistStatistic("mydb", "", testTable, sch, idx, 10, 10) require.NoError(t, err) err = c.cost(root, stat, idx) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index e57cf6662a..f350763a37 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -1344,7 +1344,12 @@ func makeIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, tab plan.Table for _, e := range idx.SqlIdx().Expressions() { cols = append(cols, strings.TrimPrefix(e, tablePrefix)) } - stats, _ := statsProv.GetStats(ctx, sql.NewStatQualifier(tn.Database().Name(), tn.Name(), idx.SqlIdx().ID()), cols) + var schemaName string + if schTab, ok := tn.(sql.DatabaseSchemaTable); ok { + schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) + } + + stats, _ := statsProv.GetStats(ctx, sql.NewStatQualifier(tn.Database().Name(), schemaName, tn.Name(), idx.SqlIdx().ID()), cols) return &memo.IndexScan{ Table: ret, diff --git a/sql/plan/histogram.go b/sql/plan/histogram.go index adb1ab9b71..3ee1b6dcc9 100644 --- a/sql/plan/histogram.go +++ b/sql/plan/histogram.go @@ -79,15 +79,16 @@ func (u *UpdateHistogram) IsReadOnly() bool { return false } -func NewDropHistogram(db, table string, cols []string) *DropHistogram { - return &DropHistogram{db: db, cols: cols, table: table} +func NewDropHistogram(db, schema, table string, cols []string) *DropHistogram { + return &DropHistogram{db: db, schema: schema, cols: cols, table: table} } type DropHistogram struct { - db string - table string - cols []string - prov sql.StatsProvider + db string + schema string + table string + cols []string + prov sql.StatsProvider } var _ sql.Node = (*DropHistogram)(nil) @@ -106,6 +107,10 @@ func (d *DropHistogram) Db() string { return d.db } +func (d *DropHistogram) SchemaName() string { + return d.schema +} + func (d *DropHistogram) Table() string { return d.table } diff --git a/sql/planbuilder/analyze.go b/sql/planbuilder/analyze.go index c5fe5d50de..e336c1ca5c 100644 --- a/sql/planbuilder/analyze.go +++ b/sql/planbuilder/analyze.go @@ -70,7 +70,7 @@ func (b *Builder) buildAnalyze(inScope *scope, n *ast.Analyze, query string) (ou switch n.Action { case ast.UpdateStr: sch := tableScope.node.Schema() - return b.buildAnalyzeUpdate(inScope, n, strings.ToLower(n.Tables[0].DbQualifier.String()), strings.ToLower(n.Tables[0].Name.String()), sch, columns, types) + return b.buildAnalyzeUpdate(inScope, n, strings.ToLower(n.Tables[0].DbQualifier.String()), strings.ToLower(n.Tables[0].SchemaQualifier.String()), strings.ToLower(n.Tables[0].Name.String()), sch, columns, types) case ast.DropStr: outScope = inScope.push() dbName := n.Tables[0].DbQualifier.String() @@ -81,7 +81,7 @@ func (b *Builder) buildAnalyze(inScope *scope, n *ast.Analyze, query string) (ou b.handleErr(sql.ErrNoDatabaseSelected.New()) } - outScope.node = plan.NewDropHistogram(strings.ToLower(dbName), strings.ToLower(n.Tables[0].Name.String()), columns).WithProvider(b.cat) + outScope.node = plan.NewDropHistogram(strings.ToLower(dbName), strings.ToLower(n.Tables[0].SchemaQualifier.String()), strings.ToLower(n.Tables[0].Name.String()), columns).WithProvider(b.cat) default: err := fmt.Errorf("invalid ANALYZE action: %s, expected UPDATE or DROP", n.Action) b.handleErr(err) @@ -117,7 +117,7 @@ func (b *Builder) buildAnalyzeTables(inScope *scope, n *ast.Analyze, query strin return } -func (b *Builder) buildAnalyzeUpdate(inScope *scope, n *ast.Analyze, dbName, tableName string, sch sql.Schema, columns []string, types []sql.Type) (outScope *scope) { +func (b *Builder) buildAnalyzeUpdate(inScope *scope, n *ast.Analyze, dbName, schemaName, tableName string, sch sql.Schema, columns []string, types []sql.Type) (outScope *scope) { if dbName == "" { dbName = b.ctx.GetCurrentDatabase() } @@ -155,7 +155,7 @@ func (b *Builder) buildAnalyzeUpdate(inScope *scope, n *ast.Analyze, dbName, tab statistic := statisticJ.ToStatistic() - statistic.SetQualifier(sql.NewStatQualifier(strings.ToLower(dbName), tableName, strings.ToLower(indexName))) + statistic.SetQualifier(sql.NewStatQualifier(strings.ToLower(dbName), strings.ToLower(schemaName), tableName, strings.ToLower(indexName))) statistic.SetColumns(columns) statistic.SetTypes(types) diff --git a/sql/rowexec/other.go b/sql/rowexec/other.go index b9ac346399..8c0ccbf628 100644 --- a/sql/rowexec/other.go +++ b/sql/rowexec/other.go @@ -333,6 +333,7 @@ func (b *BaseBuilder) buildDropHistogram(ctx *sql.Context, n *plan.DropHistogram return &dropHistogramIter{ db: n.Db(), + schema: n.SchemaName(), table: n.Table(), columns: n.Cols(), prov: n.StatsProvider(), diff --git a/sql/rowexec/other_iters.go b/sql/rowexec/other_iters.go index 0aa30cf124..387684a51f 100644 --- a/sql/rowexec/other_iters.go +++ b/sql/rowexec/other_iters.go @@ -84,6 +84,7 @@ func (itr *updateHistogramIter) Close(_ *sql.Context) error { type dropHistogramIter struct { db string + schema string table string columns []string prov sql.StatsProvider @@ -99,7 +100,7 @@ func (itr *dropHistogramIter) Next(ctx *sql.Context) (sql.Row, error) { defer func() { itr.done = true }() - qual := sql.NewStatQualifier(itr.db, itr.table, "") + qual := sql.NewStatQualifier(itr.db, itr.schema, itr.table, "") err := itr.prov.DropStats(ctx, qual, itr.columns) if err != nil { return sql.Row{itr.table, "histogram", "error", err.Error()}, nil diff --git a/sql/statistics.go b/sql/statistics.go index 833b992661..c3524abe07 100644 --- a/sql/statistics.go +++ b/sql/statistics.go @@ -91,6 +91,7 @@ type MutableStatistic interface { WithLowerBound(Row) Statistic } +// NewSchemaQualifierFromString creates a new StatQualifier from a string. func NewQualifierFromString(q string) (StatQualifier, error) { parts := strings.Split(q, ".") if len(parts) < 3 { @@ -99,23 +100,42 @@ func NewQualifierFromString(q string) (StatQualifier, error) { return StatQualifier{Database: parts[0], Tab: parts[1], Idx: parts[2]}, nil } -func NewStatQualifier(db, table, index string) StatQualifier { - return StatQualifier{Database: strings.ToLower(db), Tab: strings.ToLower(table), Idx: strings.ToLower(index)} +// NewSchemaQualifierFromString creates a new StatQualifier from a string, +// assuming the string contains a schema part. +func NewSchemaQualifierFromString(q string) (StatQualifier, error) { + parts := strings.Split(q, ".") + if len(parts) < 4 { + return StatQualifier{}, fmt.Errorf("invalid qualifier string: '%s', expected '...'", q) + } + return StatQualifier{Database: parts[0], Sch: parts[1], Tab: parts[2], Idx: parts[3]}, nil +} + +func NewStatQualifier(db, schema, table, index string) StatQualifier { + return StatQualifier{ + Database: strings.ToLower(db), + Sch: strings.ToLower(schema), + Tab: strings.ToLower(table), + Idx: strings.ToLower(index)} } // StatQualifier is the namespace hierarchy for a given statistic. // The qualifier and set of columns completely describes a unique stat. type StatQualifier struct { Database string `json:"database"` + Sch string `json:"schema"` Tab string `json:"table"` Idx string `json:"index"` } func (q StatQualifier) String() string { + tableName := q.Tab + if q.Sch != "" { + tableName = fmt.Sprintf("%s.%s", q.Sch, q.Tab) + } if q.Idx != "" { - return fmt.Sprintf("%s.%s.%s", q.Database, q.Tab, q.Idx) + return fmt.Sprintf("%s.%s.%s", q.Database, tableName, q.Idx) } - return fmt.Sprintf("%s.%s", q.Database, q.Tab) + return fmt.Sprintf("%s.%s", q.Database, tableName) } func (q StatQualifier) Empty() bool { @@ -126,6 +146,10 @@ func (q StatQualifier) Db() string { return q.Database } +func (q StatQualifier) Schema() string { + return q.Sch +} + func (q StatQualifier) Table() string { return q.Tab } diff --git a/sql/stats/iter.go b/sql/stats/iter.go index 1123b2db0d..2b84289964 100644 --- a/sql/stats/iter.go +++ b/sql/stats/iter.go @@ -59,7 +59,8 @@ func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) { } dStat := s.dStats[s.i] - if s.j >= len(dStat.Histogram()) { + hist := dStat.Histogram() + if s.j >= len(hist) { s.i++ s.j = 0 continue @@ -67,7 +68,7 @@ func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) { currentJ := s.j s.j++ - return s.bucketToRow(currentJ, dStat.Histogram()[currentJ]) + return s.bucketToRow(currentJ, hist[currentJ]) } }