Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
liujiayi771 committed Oct 14, 2024
1 parent 3c1774a commit 52ba1f6
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 25 deletions.
29 changes: 16 additions & 13 deletions velox/functions/sparksql/specialforms/SparkCastHooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,28 @@ SparkCastHooks::SparkCastHooks(const core::QueryConfig& config) : CastHooks() {
Expected<Timestamp> SparkCastHooks::castStringToTimestamp(
const StringView& view) const {
// Allows all patterns supported by Spark:
// `[+-]yyyy*` `[+-]yyyy*-[m]m` `[+-]yyyy*-[m]m-[d]d` `[+-]yyyy*-[m]m-[d]d `
// `[+-]yyyy*`
// `[+-]yyyy*-[m]m`
// `[+-]yyyy*-[m]m-[d]d`
// `[+-]yyyy*-[m]m-[d]d `
// `[+-]yyyy*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
// `[+-]yyyy*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
// `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
// `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
//
// where `zone_id` should have one of the forms:
// - Z - Zulu time zone UTC+0
// - +|-[h]h:[m]m
// - A short id, see
// 1. Z - Zulu time zone UTC+0
// 2. +|-[h]h:[m]m
// 3. A short id, see
// https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
// - An id with one of the prefixes
// 4. An id with one of the prefixes
// UTC+, UTC-, GMT+, GMT-, UT+ or UT-, and a suffix in the
// formats:
// - +|-h[h]
// - +|-hh[:]mm
// - +|-hh:mm:ss
// - +|-hhmmss
// - Region-based zone IDs in the form `area/city`, such as `Europe/Paris`
// a. +|-h[h]
// b. +|-hh[:]mm
// c. +|-hh:mm:ss
// d. +|-hhmmss
// 5. Region-based zone IDs in the form `area/city`, such as `Europe/Paris`
const auto conversionResult = util::fromTimestampWithTimezoneString(
view.data(), view.size(), util::TimestampParseMode::kSparkCast);
if (conversionResult.hasError()) {
Expand All @@ -62,9 +65,9 @@ Expected<Timestamp> SparkCastHooks::castStringToTimestamp(
// GMT at that time.
result.first.toGMT(*result.second);
} else if (options_.timeZone != nullptr) {
// If no timezone information is available in the input string, check if we
// should understand it as being at the session timezone, and if so, convert
// to GMT.
// If the input string contains no timezone information, determine whether
// it should be interpreted as being in the session timezone and, if so,
// convert it to GMT.
result.first.toGMT(*options_.timeZone);
}
return result.first;
Expand Down
24 changes: 12 additions & 12 deletions velox/type/TimestampConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,15 +827,11 @@ Status parserError(const char* str, size_t len) {
std::string(str, len));
}

inline bool startsWith(std::string_view str, const char* prefix) {
return str.rfind(prefix, 0) == 0;
}

// Spark support an id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or
// UT-, and a suffix in the formats: h[h], hh[:]mm, hh:mm:ss, hhmmss. This func
// will validate the format of the suffix.
// Additionally, IANA does not support seconds in the offset, so this func will
// add the seconds offset to the input Timestamp.
// UT-, and a suffix in the following formats: h[h], hh[:]mm, hh:mm:ss, hhmmss.
// This func will check that the suffix format is correct. Furthermore, this
// function adds the seconds offset to the input timestamp because
// IANA(https://www.iana.org/time-zones) does not support seconds in the offset.
bool parseSparkTzWithPrefix(
std::string& timeZoneName,
Timestamp& ts,
Expand Down Expand Up @@ -906,8 +902,12 @@ bool parseSparkTzWithPrefix(
}

bool normalizeSparkTimezone(std::string& timeZoneName, Timestamp& ts) {
// Spark support an id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or
// UT-, and a suffix in the formats: h[h], hh[:]mm, hh:mm:ss, hhmmss
auto startsWith = [](std::string_view str, const char* prefix) {
return str.rfind(prefix, 0) == 0;
};
// Spark supports a timezone id with one of the prefixes UTC+, UTC-, GMT+,
// GMT-, UT+ or UT-, and a suffix in the following formats: h[h], hh[:]mm,
// hh:mm:ss, hhmmss.
if (startsWith(timeZoneName, "UTC") || startsWith(timeZoneName, "GMT")) {
if (!parseSparkTzWithPrefix(timeZoneName, ts, 3)) {
return false;
Expand Down Expand Up @@ -995,8 +995,8 @@ fromTimestampWithTimezoneString(
timezonePos++;
}

std::string timeZoneName(str + pos, timezonePos - pos);
std::string normalizeTzName = timeZoneName;
std::string_view timeZoneName(str + pos, timezonePos - pos);
std::string normalizeTzName = std::string{timeZoneName};
if (parseMode == TimestampParseMode::kSparkCast &&
!normalizeSparkTimezone(normalizeTzName, resultTimestamp)) {
return folly::makeUnexpected(Status::UserError(
Expand Down

0 comments on commit 52ba1f6

Please sign in to comment.