Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add log-formatting to the otel-LogHandler #3673

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

- Retain meaningful logrecord attributes and apply log-message formatting
([#3673](https://github.com/open-telemetry/opentelemetry-python/pull/3673))
- Make `tracer.start_as_current_span()` decorator work with async functions
([#3633](https://github.com/open-telemetry/opentelemetry-python/pull/3633))
- Fix python 3.12 deprecation warning
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,7 @@ this repository and perform an [editable
install](https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs):

```sh
pip install -e ./opentelemetry-api
pip install -e ./opentelemetry-sdk
pip install -e ./opentelemetry-sdk/ -e ./opentelemetry-api/ -e ./opentelemetry-semantic-conventions/
pip install -e ./instrumentation/opentelemetry-instrumentation-{instrumentation}
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@

_DEFAULT_OTEL_ATTRIBUTE_COUNT_LIMIT = 128
_ENV_VALUE_UNSET = ""
_EXCLUDED_ATTRIBUTES = {
# pseudo-private log-record attributes, they just get dropped
"args",
"msg",
"message",
"stack_info",
"exc_info",
"exc_text",
Comment on lines +60 to +65
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we should still discard the attributes like taskName, asctime etc.. that don't have the mapping in sematic conventions. Not that they are not useful but we only emit the attributes that sem convs defined in SDK

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have an overly strong opinion on this, but sway towards emit all, or at the very least some of them.

I can't find the source right now, but I think I've read that OTEL implementations should prioritize being written idiomatic within the language the implementation is written in over sticking to details of the general OTEL guidelines. I'd consider log-record attributes to fall into this category. As a python developer, I expect attributes like lineno, module, and funcName to exist on a logrecord.

Also, custom attributes already do get emitted.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think language idiomatic implementation suggestions should interpreted to associate with the attributes. This is an independent area. We are emitting some of them already by mapping to semconv attributes. And will emit the rest too but that will be after there is some spec guidelines about such cases.

# attributes that are retained, but with a different name
# following semantic conventions
"pathname",
"funcName",
"lineno",
"thread",
"threadName",
a-recknagel marked this conversation as resolved.
Show resolved Hide resolved
}


class LogLimits:
Expand Down Expand Up @@ -403,37 +419,6 @@ def force_flush(self, timeout_millis: int = 30000) -> bool:
return True


# skip natural LogRecord attributes
# http://docs.python.org/library/logging.html#logrecord-attributes
_RESERVED_ATTRS = frozenset(
(
"asctime",
"args",
"created",
"exc_info",
"exc_text",
"filename",
"funcName",
"message",
"levelname",
"levelno",
"lineno",
"module",
"msecs",
"msg",
"name",
"pathname",
"process",
"processName",
"relativeCreated",
"stack_info",
"thread",
"threadName",
"taskName",
)
)


class LoggingHandler(logging.Handler):
"""A handler class which writes logging records, in OTLP format, to
a network destination or file. Supports signals from the `logging` module.
Expand All @@ -454,13 +439,18 @@ def __init__(
@staticmethod
def _get_attributes(record: logging.LogRecord) -> Attributes:
attributes = {
k: v for k, v in vars(record).items() if k not in _RESERVED_ATTRS
k: v
for k, v in vars(record).items()
if k not in _EXCLUDED_ATTRIBUTES and v is not None
}

# Add standard code attributes for logs.
attributes[SpanAttributes.CODE_FILEPATH] = record.pathname
attributes[SpanAttributes.CODE_FUNCTION] = record.funcName
attributes[SpanAttributes.CODE_LINENO] = record.lineno
# Add thread identifiers for logs.
attributes[SpanAttributes.THREAD_ID] = record.thread
attributes[SpanAttributes.THREAD_NAME] = record.threadName

if record.exc_info:
exctype, value, tb = record.exc_info
Expand All @@ -480,48 +470,8 @@ def _translate(self, record: logging.LogRecord) -> LogRecord:
observered_timestamp = time_ns()
span_context = get_current_span().get_span_context()
attributes = self._get_attributes(record)
# This comment is taken from GanyedeNil's PR #3343, I have redacted it
# slightly for clarity:
# According to the definition of the Body field type in the
# OTel 1.22.0 Logs Data Model article, the Body field should be of
# type 'any' and should not use the str method to directly translate
# the msg. This is because str only converts non-text types into a
# human-readable form, rather than a standard format, which leads to
# the need for additional operations when collected through a log
# collector.
# Considering that he Body field should be of type 'any' and should not
# use the str method but record.msg is also a string type, then the
# difference is just the self.args formatting?
# The primary consideration depends on the ultimate purpose of the log.
# Converting the default log directly into a string is acceptable as it
# will be required to be presented in a more readable format. However,
# this approach might not be as "standard" when hoping to aggregate
# logs and perform subsequent data analysis. In the context of log
# extraction, it would be more appropriate for the msg to be
# converted into JSON format or remain unchanged, as it will eventually
# be transformed into JSON. If the final output JSON data contains a
# structure that appears similar to JSON but is not, it may confuse
# users. This is particularly true for operation and maintenance
# personnel who need to deal with log data in various languages.
# Where is the JSON converting occur? and what about when the msg
# represents something else but JSON, the expected behavior change?
# For the ConsoleLogExporter, it performs the to_json operation in
# opentelemetry.sdk._logs._internal.export.ConsoleLogExporter.__init__,
# so it can handle any type of input without problems. As for the
# OTLPLogExporter, it also handles any type of input encoding in
# _encode_log located in
# opentelemetry.exporter.otlp.proto.common._internal._log_encoder.
# Therefore, no extra operation is needed to support this change.
# The only thing to consider is the users who have already been using
# this SDK. If they upgrade the SDK after this change, they will need
# to readjust their logging collection rules to adapt to the latest
# output format. Therefore, this change is considered a breaking
# change and needs to be upgraded at an appropriate time.
severity_number = std_to_otel(record.levelno)
if isinstance(record.msg, str) and record.args:
body = record.msg % record.args
else:
body = record.msg
body = self.format(record)

# related to https://github.com/open-telemetry/opentelemetry-python/issues/3548
# Severity Text = WARN as defined in https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md#displaying-severity.
Expand Down
4 changes: 2 additions & 2 deletions opentelemetry-sdk/tests/logs/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ def test_simple_log_record_processor_different_msg_types(self):
"Temperature hits high 420 C in Hyderabad",
"CRITICAL",
),
(["list", "of", "strings"], "WARN"),
({"key": "value"}, "ERROR"),
("['list', 'of', 'strings']", "WARN"),
("{'key': 'value'}", "ERROR"),
]
emitted = [
(item.log_record.body, item.log_record.severity_text)
Expand Down
78 changes: 74 additions & 4 deletions opentelemetry-sdk/tests/logs/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@
from opentelemetry.trace import INVALID_SPAN_CONTEXT


def get_logger(level=logging.NOTSET, logger_provider=None):
def get_logger(level=logging.NOTSET, logger_provider=None, formatter=None):
logger = logging.getLogger(__name__)
handler = LoggingHandler(level=level, logger_provider=logger_provider)
if formatter:
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger

Expand Down Expand Up @@ -139,8 +141,10 @@ def test_log_record_user_attributes(self):
log_record = args[0]

self.assertIsNotNone(log_record)
self.assertEqual(len(log_record.attributes), 4)
self.assertEqual(log_record.attributes["http.status_code"], 200)
self.assertEqual(
log_record.attributes,
{**log_record.attributes, **{"http.status_code": 200}},
)
self.assertTrue(
log_record.attributes[SpanAttributes.CODE_FILEPATH].endswith(
"test_handler.py"
Expand Down Expand Up @@ -171,7 +175,7 @@ def test_log_record_exception(self):
log_record = args[0]

self.assertIsNotNone(log_record)
self.assertEqual(log_record.body, "Zero Division Error")
self.assertIn("Zero Division Error", log_record.body)
self.assertEqual(
log_record.attributes[SpanAttributes.EXCEPTION_TYPE],
ZeroDivisionError.__name__,
Expand Down Expand Up @@ -235,3 +239,69 @@ def test_log_record_trace_correlation(self):
self.assertEqual(log_record.trace_id, span_context.trace_id)
self.assertEqual(log_record.span_id, span_context.span_id)
self.assertEqual(log_record.trace_flags, span_context.trace_flags)

def test_log_record_args_are_translated(self):
emitter_provider_mock = Mock(spec=LoggerProvider)
emitter_mock = APIGetLogger(
__name__, logger_provider=emitter_provider_mock
)
logger = get_logger(logger_provider=emitter_provider_mock)

with self.assertLogs(level=logging.INFO):
logger.info("Test message")
args, _ = emitter_mock.emit.call_args_list[0]
log_record = args[0]

self.assertEqual(
set(log_record.attributes),
{
"code.filepath",
"code.function",
"code.lineno",
"created",
"filename",
"levelname",
"levelno",
"module",
"msecs",
"name",
"process",
"processName",
"relativeCreated",
"thread.id",
"thread.name",
},
)

def test_format_is_called(self):
emitter_provider_mock = Mock(spec=LoggerProvider)
emitter_mock = APIGetLogger(
__name__, logger_provider=emitter_provider_mock
)
formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
logger = get_logger(
logger_provider=emitter_provider_mock, formatter=formatter
)

with self.assertLogs(level=logging.INFO):
logger.info("Test message")
args, _ = emitter_mock.emit.call_args_list[0]
log_record = args[0]

self.assertEqual(
log_record.body, "tests.logs.test_handler - INFO - Test message"
)

def test_log_body_is_always_string(self):
emitter_provider_mock = Mock(spec=LoggerProvider)
emitter_mock = APIGetLogger(
__name__, logger_provider=emitter_provider_mock
)
logger = get_logger(logger_provider=emitter_provider_mock)

with self.assertLogs(level=logging.INFO):
logger.info(["something", "of", "note"])
args, _ = emitter_mock.emit.call_args_list[0]
log_record = args[0]

self.assertIsInstance(log_record.body, str)