From 849a56f0404a71ee236baf9820a5b7eb7fb8f0d6 Mon Sep 17 00:00:00 2001 From: gengjun-git Date: Fri, 29 Nov 2024 10:39:46 +0800 Subject: [PATCH] [BugFix] Fix the issue where FE restart fails when creating a table containing too many tablets (#53062) ## Why I'm doing: Failures in serialization of log data should be thrown instead of ignored. Ignoring the error will write an empty log to bdb, causing FE startup failure. ## What I'm doing: Fix ``` 2024-11-20 14:28:03.900+08:00 ERROR (stateChangeExecutor|79) [BDBJournalCursor.deserializeData():253] fail to read journal entity key=13159, data= java.io.EOFException: null at java.io.DataInputStream.readInt(DataInputStream.java:397) ~[?:?] at com.starrocks.common.io.Text.readString(Text.java:391) ~[starrocks-fe.jar:?] at com.starrocks.journal.JournalEntity.readFields(JournalEntity.java:249) ~[starrocks-fe.jar:?] at com.starrocks.journal.bdbje.BDBJournalCursor.deserializeData(BDBJournalCursor.java:248) ~[starrocks-fe.jar:?] at com.starrocks.journal.bdbje.BDBJournalCursor.next(BDBJournalCursor.java:292) ~[starrocks-fe.jar:?] at com.starrocks.server.GlobalStateMgr.replayJournalInner(GlobalStateMgr.java:1933) ~[starrocks-fe.jar:?] at com.starrocks.server.GlobalStateMgr.replayJournal(GlobalStateMgr.java:1892) ~[starrocks-fe.jar:?] at com.starrocks.server.GlobalStateMgr.transferToLeader(GlobalStateMgr.java:1221) ~[starrocks-fe.jar:?] at com.starrocks.server.GlobalStateMgr$1.transferToLeader(GlobalStateMgr.java:745) ~[starrocks-fe.jar:?] at com.starrocks.ha.StateChangeExecutor.runOneCycle(StateChangeExecutor.java:103) ~[starrocks-fe.jar:?] at com.starrocks.common.util.Daemon.run(Daemon.java:107) ~[starrocks-fe.jar:?] 2024-11-20 14:28:03.919+08:00 WARN (stateChangeExecutor|79) [GlobalStateMgr.replayJournalInner():1954] catch exception when replaying journal, id: 13159, data: null, com.starrocks.journal.JournalException: fail to read journal entity key=13159, data= at com.starrocks.journal.bdbje.BDBJournalCursor.deserializeData(BDBJournalCursor.java:254) ~[starrocks-fe.jar:?] at com.starrocks.journal.bdbje.BDBJournalCursor.next(BDBJournalCursor.java:292) ~[starrocks-fe.jar:?] at com.starrocks.server.GlobalStateMgr.replayJournalInner(GlobalStateMgr.java:1933) ~[starrocks-fe.jar:?] at com.starrocks.server.GlobalStateMgr.replayJournal(GlobalStateMgr.java:1892) ~[starrocks-fe.jar:?] at com.starrocks.server.GlobalStateMgr.transferToLeader(GlobalStateMgr.java:1221) ~[starrocks-fe.jar:?] at com.starrocks.server.GlobalStateMgr$1.transferToLeader(GlobalStateMgr.java:745) ~[starrocks-fe.jar:?] at com.starrocks.ha.StateChangeExecutor.runOneCycle(StateChangeExecutor.java:103) ~[starrocks-fe.jar:?] at com.starrocks.common.util.Daemon.run(Daemon.java:107) ~[starrocks-fe.jar:?] Caused by: java.io.EOFException at java.io.DataInputStream.readInt(DataInputStream.java:397) ~[?:?] at com.starrocks.common.io.Text.readString(Text.java:391) ~[starrocks-fe.jar:?] at com.starrocks.journal.JournalEntity.readFields(JournalEntity.java:249) ~[starrocks-fe.jar:?] at com.starrocks.journal.bdbje.BDBJournalCursor.deserializeData(BDBJournalCursor.java:248) ~[starrocks-fe.jar:?] ``` Signed-off-by: gengjun-git (cherry picked from commit eb61f0753b6ac2ae8afeedc84f85306f4499e195) --- .../java/com/starrocks/catalog/Database.java | 11 ++++++ .../starrocks/journal/SerializeException.java | 21 ++++++++++ .../java/com/starrocks/persist/EditLog.java | 7 +++- .../com/starrocks/server/LocalMetastore.java | 5 +++ .../starrocks/server/LocalMetaStoreTest.java | 15 ++++++- .../server/SerializeFailedTable.java | 39 +++++++++++++++++++ 6 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 fe/fe-core/src/main/java/com/starrocks/journal/SerializeException.java create mode 100644 fe/fe-core/src/test/java/com/starrocks/server/SerializeFailedTable.java diff --git a/fe/fe-core/src/main/java/com/starrocks/catalog/Database.java b/fe/fe-core/src/main/java/com/starrocks/catalog/Database.java index 28caa7e9edf96..f1b4a516773d4 100644 --- a/fe/fe-core/src/main/java/com/starrocks/catalog/Database.java +++ b/fe/fe-core/src/main/java/com/starrocks/catalog/Database.java @@ -312,6 +312,17 @@ public boolean registerTableUnlocked(Table table) { return true; } + public void unRegisterTableUnlocked(Table table) { + if (table == null) { + return; + } + + idToTable.remove(table.getId()); + if (!table.isTemporaryTable()) { + nameToTable.remove(table.getName()); + } + } + public void dropTable(String tableName, boolean isSetIfExists, boolean isForce) throws DdlException { Table table; Locker locker = new Locker(); diff --git a/fe/fe-core/src/main/java/com/starrocks/journal/SerializeException.java b/fe/fe-core/src/main/java/com/starrocks/journal/SerializeException.java new file mode 100644 index 0000000000000..06389c0c552a3 --- /dev/null +++ b/fe/fe-core/src/main/java/com/starrocks/journal/SerializeException.java @@ -0,0 +1,21 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.starrocks.journal; + +public class SerializeException extends RuntimeException { + public SerializeException(String message) { + super(message); + } +} diff --git a/fe/fe-core/src/main/java/com/starrocks/persist/EditLog.java b/fe/fe-core/src/main/java/com/starrocks/persist/EditLog.java index ac919f88d9a0c..e51ed1c4b22ac 100644 --- a/fe/fe-core/src/main/java/com/starrocks/persist/EditLog.java +++ b/fe/fe-core/src/main/java/com/starrocks/persist/EditLog.java @@ -36,6 +36,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.gson.JsonParseException; import com.starrocks.alter.AlterJobV2; import com.starrocks.alter.BatchAlterJobPersistInfo; import com.starrocks.authentication.UserAuthenticationInfo; @@ -63,6 +64,7 @@ import com.starrocks.journal.JournalEntity; import com.starrocks.journal.JournalInconsistentException; import com.starrocks.journal.JournalTask; +import com.starrocks.journal.SerializeException; import com.starrocks.journal.bdbje.Timestamp; import com.starrocks.load.DeleteMgr; import com.starrocks.load.ExportFailMsg; @@ -1150,9 +1152,10 @@ private JournalTask submitLog(short op, Writable writable, long maxWaitIntervalM entity.setOpCode(op); entity.setData(writable); entity.write(buffer); - } catch (IOException e) { + } catch (IOException | JsonParseException e) { // The old implementation swallow exception like this - LOG.info("failed to serialize, ", e); + LOG.info("failed to serialize journal data", e); + throw new SerializeException("failed to serialize journal data"); } JournalTask task = new JournalTask(startTimeNano, buffer, maxWaitIntervalMs); diff --git a/fe/fe-core/src/main/java/com/starrocks/server/LocalMetastore.java b/fe/fe-core/src/main/java/com/starrocks/server/LocalMetastore.java index 3e14046b8fe2f..3fd5b0a620bdd 100644 --- a/fe/fe-core/src/main/java/com/starrocks/server/LocalMetastore.java +++ b/fe/fe-core/src/main/java/com/starrocks/server/LocalMetastore.java @@ -118,6 +118,7 @@ import com.starrocks.common.util.concurrent.lock.Locker; import com.starrocks.connector.ConnectorMetadata; import com.starrocks.connector.exception.StarRocksConnectorException; +import com.starrocks.journal.SerializeException; import com.starrocks.lake.DataCacheInfo; import com.starrocks.lake.LakeMaterializedView; import com.starrocks.lake.LakeTable; @@ -1905,6 +1906,10 @@ public void onCreate(Database db, Table table, String storageVolumeId, boolean i CreateTableInfo createTableInfo = new CreateTableInfo(db.getFullName(), table, storageVolumeId); GlobalStateMgr.getCurrentState().getEditLog().logCreateTable(createTableInfo); table.onCreate(db); + } catch (SerializeException e) { + db.unRegisterTableUnlocked(table); + LOG.warn("create table failed", e); + ErrorReport.reportDdlException(ErrorCode.ERR_CANT_CREATE_TABLE, table.getName(), e.getMessage()); } finally { locker.unLockDatabase(db.getId(), LockType.WRITE); } diff --git a/fe/fe-core/src/test/java/com/starrocks/server/LocalMetaStoreTest.java b/fe/fe-core/src/test/java/com/starrocks/server/LocalMetaStoreTest.java index 219721838e739..b1c2696280085 100644 --- a/fe/fe-core/src/test/java/com/starrocks/server/LocalMetaStoreTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/server/LocalMetaStoreTest.java @@ -72,7 +72,7 @@ public static void beforeClass() throws Exception { Config.alter_scheduler_interval_millisecond = 1000; FeConstants.runningUnitTest = true; - UtFrameUtils.createMinStarRocksCluster(); + UtFrameUtils.createMinStarRocksCluster(true, RunMode.SHARED_NOTHING); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); @@ -311,4 +311,17 @@ public void testRenameColumnException() throws Exception { } } + @Test + public void testCreateTableSerializeException() { + final long tableId = 1000010L; + final String tableName = "test"; + Database db = connectContext.getGlobalStateMgr().getLocalMetastore().getDb("test"); + LocalMetastore localMetastore = connectContext.getGlobalStateMgr().getLocalMetastore(); + SerializeFailedTable table = new SerializeFailedTable(1000010L, "serialize_test"); + + Assert.assertThrows(DdlException.class, () -> localMetastore.onCreate(db, table, "", true)); + + Assert.assertNull(db.getTable(tableId)); + Assert.assertNull(db.getTable(tableName)); + } } diff --git a/fe/fe-core/src/test/java/com/starrocks/server/SerializeFailedTable.java b/fe/fe-core/src/test/java/com/starrocks/server/SerializeFailedTable.java new file mode 100644 index 0000000000000..c0091cc44fdf5 --- /dev/null +++ b/fe/fe-core/src/test/java/com/starrocks/server/SerializeFailedTable.java @@ -0,0 +1,39 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.starrocks.server; + +import com.starrocks.catalog.Table; +import com.starrocks.persist.gson.GsonPreProcessable; + +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; + +public class SerializeFailedTable extends Table implements GsonPreProcessable { + + public SerializeFailedTable(long id, String name) { + super(id, name, TableType.OLAP, new ArrayList<>()); + } + + @Override + public void write(DataOutput out) throws IOException { + throw new IOException("failed"); + } + + @Override + public void gsonPreProcess() throws IOException { + throw new IOException("failed"); + } +}