open-telemetry · a-recknagel · Feb 5, 2024 · Feb 5, 2024 · Feb 5, 2024 · Feb 8, 2024
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+- Retain meaningful logrecord attributes and apply log-message formatting 
+ ([#3673](https://github.com/open-telemetry/opentelemetry-python/pull/3673))
 - Make `tracer.start_as_current_span()` decorator work with async functions
  ([#3633](https://github.com/open-telemetry/opentelemetry-python/pull/3633))
 - Fix python 3.12 deprecation warning

@@ -80,8 +80,7 @@ this repository and perform an [editable
 install](https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs):
 
 ```sh
-pip install -e ./opentelemetry-api
-pip install -e ./opentelemetry-sdk
+pip install -e ./opentelemetry-sdk/ -e ./opentelemetry-api/ -e ./opentelemetry-semantic-conventions/
 pip install -e ./instrumentation/opentelemetry-instrumentation-{instrumentation}
 ```
 

@@ -54,6 +54,22 @@
 
 _DEFAULT_OTEL_ATTRIBUTE_COUNT_LIMIT = 128
 _ENV_VALUE_UNSET = ""
+_EXCLUDED_ATTRIBUTES = {
+ # pseudo-private log-record attributes, they just get dropped
+ "args",
+ "msg",
+ "message",
+ "stack_info",
+ "exc_info",
+ "exc_text",
+ # attributes that are retained, but with a different name
+ # following semantic conventions
+ "pathname",
+ "funcName",
+ "lineno",
+ "thread",
+ "threadName",
+}
 
 
 class LogLimits:
@@ -403,37 +419,6 @@ def force_flush(self, timeout_millis: int = 30000) -> bool:
  return True
 
 
-# skip natural LogRecord attributes
-# http://docs.python.org/library/logging.html#logrecord-attributes
-_RESERVED_ATTRS = frozenset(
- (
- "asctime",
- "args",
- "created",
- "exc_info",
- "exc_text",
- "filename",
- "funcName",
- "message",
- "levelname",
- "levelno",
- "lineno",
- "module",
- "msecs",
- "msg",
- "name",
- "pathname",
- "process",
- "processName",
- "relativeCreated",
- "stack_info",
- "thread",
- "threadName",
- "taskName",
- )
-)
-
-
 class LoggingHandler(logging.Handler):
  """A handler class which writes logging records, in OTLP format, to
  a network destination or file. Supports signals from the `logging` module.
@@ -454,13 +439,18 @@ def __init__(
  @staticmethod
  def _get_attributes(record: logging.LogRecord) -> Attributes:
  attributes = {
- k: v for k, v in vars(record).items() if k not in _RESERVED_ATTRS
+ k: v
+ for k, v in vars(record).items()
+ if k not in _EXCLUDED_ATTRIBUTES and v is not None
  }
 
  # Add standard code attributes for logs.
  attributes[SpanAttributes.CODE_FILEPATH] = record.pathname
  attributes[SpanAttributes.CODE_FUNCTION] = record.funcName
  attributes[SpanAttributes.CODE_LINENO] = record.lineno
+ # Add thread identifiers for logs.
+ attributes[SpanAttributes.THREAD_ID] = record.thread
+ attributes[SpanAttributes.THREAD_NAME] = record.threadName
 
  if record.exc_info:
  exctype, value, tb = record.exc_info
@@ -480,48 +470,8 @@ def _translate(self, record: logging.LogRecord) -> LogRecord:
  observered_timestamp = time_ns()
  span_context = get_current_span().get_span_context()
  attributes = self._get_attributes(record)
- # This comment is taken from GanyedeNil's PR #3343, I have redacted it
- # slightly for clarity:
- # According to the definition of the Body field type in the
- # OTel 1.22.0 Logs Data Model article, the Body field should be of
- # type 'any' and should not use the str method to directly translate
- # the msg. This is because str only converts non-text types into a
- # human-readable form, rather than a standard format, which leads to
- # the need for additional operations when collected through a log
- # collector.
- # Considering that he Body field should be of type 'any' and should not
- # use the str method but record.msg is also a string type, then the
- # difference is just the self.args formatting?
- # The primary consideration depends on the ultimate purpose of the log.
- # Converting the default log directly into a string is acceptable as it
- # will be required to be presented in a more readable format. However,
- # this approach might not be as "standard" when hoping to aggregate
- # logs and perform subsequent data analysis. In the context of log
- # extraction, it would be more appropriate for the msg to be
- # converted into JSON format or remain unchanged, as it will eventually
- # be transformed into JSON. If the final output JSON data contains a
- # structure that appears similar to JSON but is not, it may confuse
- # users. This is particularly true for operation and maintenance
- # personnel who need to deal with log data in various languages.
- # Where is the JSON converting occur? and what about when the msg
- # represents something else but JSON, the expected behavior change?
- # For the ConsoleLogExporter, it performs the to_json operation in
- # opentelemetry.sdk._logs._internal.export.ConsoleLogExporter.__init__,
- # so it can handle any type of input without problems. As for the
- # OTLPLogExporter, it also handles any type of input encoding in
- # _encode_log located in
- # opentelemetry.exporter.otlp.proto.common._internal._log_encoder.
- # Therefore, no extra operation is needed to support this change.
- # The only thing to consider is the users who have already been using
- # this SDK. If they upgrade the SDK after this change, they will need
- # to readjust their logging collection rules to adapt to the latest
- # output format. Therefore, this change is considered a breaking
- # change and needs to be upgraded at an appropriate time.
  severity_number = std_to_otel(record.levelno)
- if isinstance(record.msg, str) and record.args:
- body = record.msg % record.args
- else:
- body = record.msg
+ body = self.format(record)
 
  # related to https://github.com/open-telemetry/opentelemetry-python/issues/3548
  # Severity Text = WARN as defined in https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md#displaying-severity.

@@ -198,8 +198,8 @@ def test_simple_log_record_processor_different_msg_types(self):
  "Temperature hits high 420 C in Hyderabad",
  "CRITICAL",
  ),
- (["list", "of", "strings"], "WARN"),
- ({"key": "value"}, "ERROR"),
+ ("['list', 'of', 'strings']", "WARN"),
+ ("{'key': 'value'}", "ERROR"),
  ]
  emitted = [
  (item.log_record.body, item.log_record.severity_text)

@@ -24,9 +24,11 @@
 from opentelemetry.trace import INVALID_SPAN_CONTEXT
 
 
-def get_logger(level=logging.NOTSET, logger_provider=None):
+def get_logger(level=logging.NOTSET, logger_provider=None, formatter=None):
  logger = logging.getLogger(__name__)
  handler = LoggingHandler(level=level, logger_provider=logger_provider)
+ if formatter:
+ handler.setFormatter(formatter)
  logger.addHandler(handler)
  return logger
 
@@ -139,8 +141,10 @@ def test_log_record_user_attributes(self):
  log_record = args[0]
 
  self.assertIsNotNone(log_record)
- self.assertEqual(len(log_record.attributes), 4)
- self.assertEqual(log_record.attributes["http.status_code"], 200)
+ self.assertEqual(
+ log_record.attributes,
+ {**log_record.attributes, **{"http.status_code": 200}},
+ )
  self.assertTrue(
  log_record.attributes[SpanAttributes.CODE_FILEPATH].endswith(
  "test_handler.py"
@@ -171,7 +175,7 @@ def test_log_record_exception(self):
  log_record = args[0]
 
  self.assertIsNotNone(log_record)
- self.assertEqual(log_record.body, "Zero Division Error")
+ self.assertIn("Zero Division Error", log_record.body)
  self.assertEqual(
  log_record.attributes[SpanAttributes.EXCEPTION_TYPE],
  ZeroDivisionError.__name__,
@@ -235,3 +239,69 @@ def test_log_record_trace_correlation(self):
  self.assertEqual(log_record.trace_id, span_context.trace_id)
  self.assertEqual(log_record.span_id, span_context.span_id)
  self.assertEqual(log_record.trace_flags, span_context.trace_flags)
+
+ def test_log_record_args_are_translated(self):
+ emitter_provider_mock = Mock(spec=LoggerProvider)
+ emitter_mock = APIGetLogger(
+ __name__, logger_provider=emitter_provider_mock
+ )
+ logger = get_logger(logger_provider=emitter_provider_mock)
+
+ with self.assertLogs(level=logging.INFO):
+ logger.info("Test message")
+ args, _ = emitter_mock.emit.call_args_list[0]
+ log_record = args[0]
+
+ self.assertEqual(
+ set(log_record.attributes),
+ {
+ "code.filepath",
+ "code.function",
+ "code.lineno",
+ "created",
+ "filename",
+ "levelname",
+ "levelno",
+ "module",
+ "msecs",
+ "name",
+ "process",
+ "processName",
+ "relativeCreated",
+ "thread.id",
+ "thread.name",
+ },
+ )
+
+ def test_format_is_called(self):
+ emitter_provider_mock = Mock(spec=LoggerProvider)
+ emitter_mock = APIGetLogger(
+ __name__, logger_provider=emitter_provider_mock
+ )
+ formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
+ logger = get_logger(
+ logger_provider=emitter_provider_mock, formatter=formatter
+ )
+
+ with self.assertLogs(level=logging.INFO):
+ logger.info("Test message")
+ args, _ = emitter_mock.emit.call_args_list[0]
+ log_record = args[0]
+
+ self.assertEqual(
+ log_record.body, "tests.logs.test_handler - INFO - Test message"
+ )
+
+ def test_log_body_is_always_string(self):
+ emitter_provider_mock = Mock(spec=LoggerProvider)
+ emitter_mock = APIGetLogger(
+ __name__, logger_provider=emitter_provider_mock
+ )
+ logger = get_logger(logger_provider=emitter_provider_mock)
+
+ with self.assertLogs(level=logging.INFO):
+ logger.info(["something", "of", "note"])
+ args, _ = emitter_mock.emit.call_args_list[0]
+ log_record = args[0]
+
+ self.assertIsInstance(log_record.body, str)