From b8a33e00bd70a3e2c6b0e73e371207654c40a7b7 Mon Sep 17 00:00:00 2001 From: Dave Bauman Date: Thu, 20 Jun 2024 12:50:18 -0400 Subject: [PATCH] fix: PR comments --- .../server/datasources/query_execution.py | 11 +- .../server/datasources/query_transform.py | 22 +++ .../server/lib/query_analysis/transform.py | 20 ++- .../test_query_analysis/test_transform.py | 45 ++++++ .../lib/sql-helper/sql-limiter.test.ts | 128 +----------------- .../DataDocQueryCell/DataDocQueryCell.tsx | 5 +- .../QueryComposer/QueryComposer.tsx | 6 +- .../components/QueryComposer/RunQuery.tsx | 30 ++-- querybook/webapp/const/queryTransform.ts | 4 + .../webapp/lib/sql-helper/sql-limiter.ts | 56 -------- .../webapp/redux/queryExecutions/action.ts | 2 - querybook/webapp/resource/queryExecution.ts | 8 +- querybook/webapp/resource/queryTransform.ts | 13 ++ 13 files changed, 118 insertions(+), 232 deletions(-) create mode 100644 querybook/webapp/const/queryTransform.ts diff --git a/querybook/server/datasources/query_execution.py b/querybook/server/datasources/query_execution.py index 42b30d792..8c3515cdb 100644 --- a/querybook/server/datasources/query_execution.py +++ b/querybook/server/datasources/query_execution.py @@ -13,7 +13,6 @@ ) from clients.common import FileDoesNotExist from lib.export.all_exporters import ALL_EXPORTERS, get_exporter -from lib.query_analysis.transform import transform_to_limited_query from lib.result_store import GenericReader from lib.query_analysis.templating import ( QueryTemplatingError, @@ -62,18 +61,10 @@ @register("/query_execution/", methods=["POST"]) -def create_query_execution( - query, engine_id, row_limit=-1, data_cell_id=None, originator=None -): +def create_query_execution(query, engine_id, data_cell_id=None, originator=None): with DBSession() as session: verify_query_engine_permission(engine_id, session=session) - row_limit_enabled = admin_logic.get_engine_feature_param( - engine_id, "row_limit", False, session=session - ) - if row_limit_enabled and row_limit >= 0: - query = transform_to_limited_query(query, row_limit) - uid = current_user.id query_execution = logic.create_query_execution( query=query, engine_id=engine_id, uid=uid, session=session diff --git a/querybook/server/datasources/query_transform.py b/querybook/server/datasources/query_transform.py index df2f0b44a..6f501dc1b 100644 --- a/querybook/server/datasources/query_transform.py +++ b/querybook/server/datasources/query_transform.py @@ -1,8 +1,30 @@ from app.datasource import register +from lib.logger import get_logger from lib.query_analysis.transform import ( + has_query_contains_unlimited_select, + transform_to_limited_query, transform_to_sampled_query, ) +LOG = get_logger(__file__) + + +@register("/query/transform/limited/", methods=["POST"]) +def query_limited( + query: str, + row_limit: int, + language: str, +): + limited_query = transform_to_limited_query( + query=query, limit=row_limit, language=language + ) + + unlimited_select = has_query_contains_unlimited_select( + query=limited_query, language=language + ) + + return {"query": limited_query, "unlimited_select": unlimited_select} + @register("/query/transform/sampling/", methods=["POST"]) def query_sampling( diff --git a/querybook/server/lib/query_analysis/transform.py b/querybook/server/lib/query_analysis/transform.py index 4299a6fa5..cc6147439 100644 --- a/querybook/server/lib/query_analysis/transform.py +++ b/querybook/server/lib/query_analysis/transform.py @@ -67,15 +67,23 @@ def get_limited_select_statement(statement_ast: exp.Expression, limit: int): return statement_ast.limit(limit) -def has_query_contains_unlimited_select(query: str, language: str) -> bool: +def has_query_contains_unlimited_select(query: str, language: str = None): """Check if a query contains a select statement without a limit. Args: query: The query to check Returns: - bool: True if the query contains a select statement without a limit, False otherwise + str: The first select statement without a limit. None if all select statements have a limit. """ - statements = parse(query, dialect=_get_sqlglot_dialect[language]) - return any(get_select_statement_limit(s) == -1 for s in statements) + dialect = _get_sqlglot_dialect(language) + statements = parse(query, dialect) + return next( + ( + s.sql(dialect=dialect, pretty=True) + for s in statements + if get_select_statement_limit(s) == -1 + ), + None, + ) def transform_to_limited_query( @@ -83,8 +91,10 @@ def transform_to_limited_query( ) -> str: """Apply a limit to all select statements in a query if they don't already have a limit. It returns a new query with the limit applied and the original query is not modified. + + If limit is None or negative, the query is returned as-is. """ - if not limit: + if not limit or limit < 0: return query try: diff --git a/querybook/tests/test_lib/test_query_analysis/test_transform.py b/querybook/tests/test_lib/test_query_analysis/test_transform.py index de6c8bfd1..06a647131 100644 --- a/querybook/tests/test_lib/test_query_analysis/test_transform.py +++ b/querybook/tests/test_lib/test_query_analysis/test_transform.py @@ -3,6 +3,7 @@ from lib.query_analysis.transform import ( format_query, get_select_statement_limit, + has_query_contains_unlimited_select, transform_to_limited_query, transform_to_sampled_query, ) @@ -46,6 +47,50 @@ def test_select_with_limit(self): self.assertEqual(get_select_statement_limit(query), expected) +class HasQueryContainsUnlimitedSelectTestCase(TestCase): + def test_select_limit(self): + tests = [ + "SELECT 1 LIMIT 10", + "SELECT * FROM table_1 WHERE field = 1 LIMIT 10", + "TRUNCATE TABLE table_1; SELECT * FROM table_1 WHERE field = 1 LIMIT 1000", + "SELECT * FROM table_1 WHERE field = 1 LIMIT 10; SELECT * FROM table_2 WHERE field = 1 LIMIT 1000", + ] + for query in tests: + with self.subTest(query=query): + self.assertIsNone(has_query_contains_unlimited_select(query)) + + def test_select_no_limit(self): + tests = [ + ("SELECT 1", "SELECT\n 1"), + ( + "SELECT * FROM table_1 WHERE field = 1", + "SELECT\n *\nFROM table_1\nWHERE\n field = 1", + ), + ("SELECT 1; SELECT 2", "SELECT\n 1"), + ( + "SELECT * FROM table_1 WHERE field = 1 LIMIT 10; SELECT * FROM table_1 WHERE field = 1", + "SELECT\n *\nFROM table_1\nWHERE\n field = 1", + ), + ] + for query, expected in tests: + with self.subTest(query=query): + self.assertEquals(has_query_contains_unlimited_select(query), expected) + + def test_not_select_statements(self): + tests = [ + "DELETE FROM table_1 WHERE field = 1", + "CREATE DATABASE IF NOT EXISTS db_1", + "CREATE TABLE table_1 (field1 INT)", + "TRUNCATE TABLE table_1", + "DROP TABLE IF EXISTS db.table1; CREATE TABLE db.table1", + "INSERT INTO table_1 (field1) VALUES (1)", + "UPDATE table_1 SET field1 = 1 WHERE field = 1", + ] + for query in tests: + with self.subTest(query=query): + self.assertIsNone(has_query_contains_unlimited_select(query)) + + class GetLimitedQueryTestCase(TestCase): def test_limit_is_not_specified(self): tests = [ diff --git a/querybook/webapp/__tests__/lib/sql-helper/sql-limiter.test.ts b/querybook/webapp/__tests__/lib/sql-helper/sql-limiter.test.ts index 54d4349e2..0da7de6fd 100644 --- a/querybook/webapp/__tests__/lib/sql-helper/sql-limiter.test.ts +++ b/querybook/webapp/__tests__/lib/sql-helper/sql-limiter.test.ts @@ -1,7 +1,4 @@ -import { - getLimitedQuery, - getSelectStatementLimit, -} from 'lib/sql-helper/sql-limiter'; +import { getSelectStatementLimit } from 'lib/sql-helper/sql-limiter'; describe('getSelectStatementLimit', () => { describe('when it is not a SELECT statement', () => { @@ -76,126 +73,3 @@ describe('getSelectStatementLimit', () => { }); }); }); - -describe('getLimitedQuery', () => { - describe('not limited', () => { - test('when rowLimit is not specified', () => { - const query = 'SELECT * FROM table_1 WHERE field = 1;'; - expect(getLimitedQuery(query)).toBe(query); - }); - test('when rowLimit is not specified for multiple queries', () => { - const query = ` - SELECT * FROM table_1 WHERE field = 1; - SELECT * FROM table_1 WHERE field = 1; - `; - expect(getLimitedQuery(query)).toBe(query); - }); - test('when running a select query with fetch', () => { - const query = - 'SELECT * FROM table_1 ORDER BY id FETCH FIRST 10 ROWS ONLY;'; - expect(getLimitedQuery(query, 100, 'trino')).toBe(query); - }); - test('when running a select query with offset and fetch', () => { - const query = - 'SELECT * FROM table_1 ORDER BY id OFFSET 10 FETCH NEXT 10 ROWS ONLY;'; - expect(getLimitedQuery(query, 100, 'trino')).toBe(query); - }); - test('when running a select query with nested query', () => { - const query = `select * from (select * from table limit 5) as x limit 10`; - expect(getLimitedQuery(query, 100, 'trino')).toBe(query); - }); - test('when running a select query with a where clause and a limit', () => { - const query = 'SELECT * FROM table_1 WHERE field = 1 LIMIT 1000;'; - expect(getLimitedQuery(query, 100, 'trino')).toBe(query); - }); - }); - describe('limited', () => { - test('when running a select query', () => { - const query = 'SELECT * FROM table_1'; - expect(getLimitedQuery(query, 10)).toBe(`${query} limit 10;`); - }); - test('when running a select query with a where clause and a group by and an order by', () => { - const query = - 'SELECT field, count(*) FROM table_1 WHERE deleted = false GROUP BY field ORDER BY field'; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with trailing semicolon', () => { - const query = 'SELECT * FROM table_1;'; - expect(getLimitedQuery(query, 10)).toBe( - 'SELECT * FROM table_1 limit 10;' - ); - }); - test('when running a select query with comments', () => { - const query = 'SELECT * FROM table_1 -- limit here'; - expect(getLimitedQuery(query, 10)).toBe( - 'SELECT * FROM table_1 limit 10;' - ); - }); - test('when running a select query with non-keyword limits', () => { - const query = `SELECT id, account, 'limit' FROM querybook2.limit ORDER by 'limit' ASC`; - expect(getLimitedQuery(query, 10)).toBe(`${query} limit 10;`); - }); - test('when running a multiple select queries', () => { - const query = `SELECT * FROM table_1; -SELECT col1, col2, FROM table2;`; - expect(getLimitedQuery(query, 10)).toBe( - `SELECT * FROM table_1 limit 10; -SELECT col1, col2, FROM table2 limit 10;` - ); - }); - test('when running a select query with a where clause', () => { - const query = 'SELECT * FROM table_1 WHERE field = 1'; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with a where clause and an order by', () => { - const query = - 'SELECT * FROM table_1 WHERE field = 1 ORDER BY field'; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with a where clause and a group by and an order by', () => { - const query = - 'SELECT field, count(*) FROM table_1 WHERE deleted = false GROUP BY field ORDER BY field'; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running two select queries with mixed limits', () => { - const query = `SELECT * FROM table_1; -SELECT col1, col2, FROM table2 LIMIT 300;`; - expect(getLimitedQuery(query, 10)) - .toBe(`SELECT * FROM table_1 limit 10; -SELECT col1, col2, FROM table2 LIMIT 300;`); - }); - test('when running multiple select queries with mixed limits', () => { - const query = `SELECT * FROM table_1; -SELECT col1, col2, FROM table2 LIMIT 300; -SELECT field, count(1) FROM table3 GROUP BY field`; - expect(getLimitedQuery(query, 10)) - .toBe(`SELECT * FROM table_1 limit 10; -SELECT col1, col2, FROM table2 LIMIT 300; -SELECT field, count(1) FROM table3 GROUP BY field limit 10;`); - }); - test('when running a select query with nested query', () => { - const query = `select * from (select * from table limit 5) as x`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with wrapped where', () => { - const query = `select * from table where (field = 1 and field2 = 2)`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with two nested queries', () => { - const query = `select * from (select * from table limit 5) as x outer join (select * from table2 limit 5) as y on x.id = y.id`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with two nested queries', () => { - const query = `select * from (select * from table limit 5) as x outer join (select * from table2 limit 5) as y on x.id = y.id`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with two union queries', () => { - const query = `select id, name from table_a union all select id, name from table_b where (deleted = false and active = true)`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - test('when running a select query with two nested union queries', () => { - const query = `(select id, name from table_a limit 10) union all (select id, name from table_b where (deleted = false and active = true))`; - expect(getLimitedQuery(query, 100)).toBe(`${query} limit 100;`); - }); - }); -}); diff --git a/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx b/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx index cc073880b..7946816c6 100644 --- a/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx +++ b/querybook/webapp/components/DataDocQueryCell/DataDocQueryCell.tsx @@ -477,7 +477,6 @@ class DataDocQueryCellComponent extends React.PureComponent { await this.props.createQueryExecution( query, engineId, - this.rowLimit, this.props.cellId ) ).id; @@ -549,7 +548,6 @@ class DataDocQueryCellComponent extends React.PureComponent { return this.props.createQueryExecution( renderedQuery, this.engineId, - this.rowLimit, this.props.cellId ); } @@ -1094,9 +1092,8 @@ function mapDispatchToProps(dispatch: Dispatch) { createQueryExecution: ( query: string, engineId: number, - rowLimit: number, cellId: number - ) => dispatch(createQueryExecution(query, engineId, rowLimit, cellId)), + ) => dispatch(createQueryExecution(query, engineId, cellId)), setTableSidebarId: (id: number) => dispatch(setSidebarTableId(id)), diff --git a/querybook/webapp/components/QueryComposer/QueryComposer.tsx b/querybook/webapp/components/QueryComposer/QueryComposer.tsx index 3fd4c7335..1223802c1 100644 --- a/querybook/webapp/components/QueryComposer/QueryComposer.tsx +++ b/querybook/webapp/components/QueryComposer/QueryComposer.tsx @@ -524,11 +524,7 @@ const QueryComposer: React.FC = () => { engine.id, async (query, engineId) => { const data = await dispatch( - queryExecutionsAction.createQueryExecution( - query, - engineId, - rowLimit - ) + queryExecutionsAction.createQueryExecution(query, engineId) ); return data.id; } diff --git a/querybook/webapp/components/QueryComposer/RunQuery.tsx b/querybook/webapp/components/QueryComposer/RunQuery.tsx index 8cf801b82..493514242 100644 --- a/querybook/webapp/components/QueryComposer/RunQuery.tsx +++ b/querybook/webapp/components/QueryComposer/RunQuery.tsx @@ -5,7 +5,6 @@ import { ISamplingTables, TDataDocMetaVariables } from 'const/datadoc'; import { IQueryEngine } from 'const/queryEngine'; import { sendConfirm } from 'lib/querybookUI'; import { getDroppedTables } from 'lib/sql-helper/sql-checker'; -import { hasQueryContainUnlimitedSelect } from 'lib/sql-helper/sql-limiter'; import { renderTemplatedQuery } from 'lib/templated-query'; import { Nullable } from 'lib/typescript'; import { formatError } from 'lib/utils/error'; @@ -43,9 +42,9 @@ export async function transformQuery( sampleRate ); - await checkUnlimitedQuery(sampledQuery, rowLimit, engine); + const limitedQuery = transformLimitedQuery(sampledQuery, rowLimit, engine); - return sampledQuery; + return limitedQuery; } export async function runQuery( @@ -80,31 +79,30 @@ async function transformTemplatedQuery( } } -async function checkUnlimitedQuery( +async function transformLimitedQuery( query: string, rowLimit: Nullable, engine: IQueryEngine ) { - if ( - !engine.feature_params?.row_limit || - (rowLimit != null && rowLimit >= 0) - ) { - return; + if (!engine.feature_params?.row_limit) { + return query; } - // query is unlimited but engine has row limit feature turned on - - const unlimitedSelectQuery = hasQueryContainUnlimitedSelect( + const { data } = await QueryTransformResource.getLimitedQuery( query, - engine.language + engine.language, + rowLimit ?? -1 ); + const { query: limitedQuery, unlimited_select: unlimitedSelectQuery } = + data; + if (!unlimitedSelectQuery) { - return; + return limitedQuery; } // Show a warning modal to let user confirm what they are doing - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { sendConfirm({ header: 'Your SELECT query is unbounded', message: ( @@ -127,7 +125,7 @@ async function checkUnlimitedQuery( ), - onConfirm: () => resolve(), + onConfirm: () => resolve(limitedQuery), onDismiss: () => reject(), confirmText: 'Run without LIMIT', }); diff --git a/querybook/webapp/const/queryTransform.ts b/querybook/webapp/const/queryTransform.ts new file mode 100644 index 000000000..4fee6aebf --- /dev/null +++ b/querybook/webapp/const/queryTransform.ts @@ -0,0 +1,4 @@ +export interface IQueryTransformLimited { + query: string; + unlimited_select: null | string; +} diff --git a/querybook/webapp/lib/sql-helper/sql-limiter.ts b/querybook/webapp/lib/sql-helper/sql-limiter.ts index 34a87df43..816329921 100644 --- a/querybook/webapp/lib/sql-helper/sql-limiter.ts +++ b/querybook/webapp/lib/sql-helper/sql-limiter.ts @@ -1,5 +1,4 @@ import { - getStatementsFromQuery, getStatementType, IToken, simpleParse, @@ -67,61 +66,6 @@ export function getSelectStatementLimit( return -1; } -/** - * Check if the query has any statements that is SELECT and does not have LIMIT - * If so, return the unlimited select statement, else, return null - * - * @param query - * @param language - */ -export function hasQueryContainUnlimitedSelect( - query: string, - language?: string -): string { - const statements = getStatementsFromQuery(query, language); - - return statements.find( - (statement) => getSelectStatementLimit(statement, language) === -1 - ); -} - -/** - * Automatically apply a limit to a query that does not already have a limit. - * - * @param {string} query - Query to be executed. - * @param {number} rowLimit - Number of rows to limit the query to. - * @param {string} language - Language of the query. - * @return {string} - Query with limit applied (if necessary). - */ -export function getLimitedQuery( - query: string, - rowLimit?: number, - language?: string -): string { - if (rowLimit == null) { - return query; - } - - const statements = getStatementsFromQuery(query, language); - - let addedLimit = false; - const updatedQuery = statements - .map((statement) => { - const existingLimit = getSelectStatementLimit(statement, language); - if (existingLimit == null || existingLimit >= 0) { - return statement + ';'; - } - - addedLimit = true; - return `${statement} limit ${rowLimit};`; - }) - .join('\n'); - - // If no limit was added, return the original query - // to avoid changing whitespace, etc. - return addedLimit ? updatedQuery : query; -} - // 10^1 to 10^5 export const ROW_LIMIT_SCALE = window.ROW_LIMIT_SCALE ?? [1, 2, 3, 4, 5].map((v) => Math.pow(10, v)); diff --git a/querybook/webapp/redux/queryExecutions/action.ts b/querybook/webapp/redux/queryExecutions/action.ts index 1fef726f9..d1e365504 100644 --- a/querybook/webapp/redux/queryExecutions/action.ts +++ b/querybook/webapp/redux/queryExecutions/action.ts @@ -378,7 +378,6 @@ export function pollQueryExecution( export function createQueryExecution( query: string, engineId?: number, - rowLimit?: number, cellId?: number ): ThunkResult> { return async (dispatch, getState) => { @@ -388,7 +387,6 @@ export function createQueryExecution( const { data: queryExecution } = await QueryExecutionResource.create( query, selectedEngineId, - rowLimit, cellId ); dispatch(receiveQueryExecution(queryExecution, cellId)); diff --git a/querybook/webapp/resource/queryExecution.ts b/querybook/webapp/resource/queryExecution.ts index 27b1adf0d..e595f077b 100644 --- a/querybook/webapp/resource/queryExecution.ts +++ b/querybook/webapp/resource/queryExecution.ts @@ -66,16 +66,10 @@ export const QueryExecutionResource = { environment_id: environmentId, }), - create: ( - query: string, - engineId: number, - rowLimit?: number, - cellId?: number - ) => { + create: (query: string, engineId: number, cellId?: number) => { const params = { query, engine_id: engineId, - row_limit: rowLimit, }; if (cellId != null) { diff --git a/querybook/webapp/resource/queryTransform.ts b/querybook/webapp/resource/queryTransform.ts index 064f66ab8..47a3e5fc9 100644 --- a/querybook/webapp/resource/queryTransform.ts +++ b/querybook/webapp/resource/queryTransform.ts @@ -1,6 +1,19 @@ +import { IQueryTransformLimited } from 'const/queryTransform'; import ds from 'lib/datasource'; export const QueryTransformResource = { + getLimitedQuery: (query: string, language: string, row_limit: number) => + ds.save( + '/query/transform/limited/', + { + query, + language, + row_limit, + }, + { + notifyOnError: false, + } + ), getSampledQuery: ( query: string, language: string,