From d73593d1137a3854ceff3d7c94180d2bdb8b097f Mon Sep 17 00:00:00 2001
From: Aaron Abbott <aaronabbott@google.com>
Date: Tue, 28 May 2024 14:35:24 -0400
Subject: [PATCH] Fix prometheus metric name and unit conversion (#3924)

* Fix prometheus metric name and unit conversion

* Apply suggestions from code review

Co-authored-by: Diego Hurtado <ocelotl@users.noreply.github.com>

* Make annotation parsing more permissive, add test case for consecutive underscores

* Add test case for metric name already containing the unit

* simplify and speed up regex and update TODO

* Add OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION opt-out mechanism

* Fix RST typo

---------

Co-authored-by: Diego Hurtado <ocelotl@users.noreply.github.com>
---
 CHANGELOG.md                                  |  12 +
 .../exporter/prometheus/__init__.py           |  53 ++--
 .../exporter/prometheus/_mapping.py           | 137 +++++++++
 .../tests/test_mapping.py                     | 119 ++++++++
 .../tests/test_prometheus_exporter.py         | 284 +++++++++++++++---
 .../sdk/environment_variables.py              |  18 ++
 .../src/opentelemetry/test/metrictestutil.py  |  36 +++
 7 files changed, 598 insertions(+), 61 deletions(-)
 create mode 100644 exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py
 create mode 100644 exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6f38f17f13..0a4e061bb6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#3917](https://github.com/open-telemetry/opentelemetry-python/pull/3917/))
 - Add OpenTelemetry trove classifiers to PyPI packages
   ([#3913] (https://github.com/open-telemetry/opentelemetry-python/pull/3913))
+- Fix prometheus metric name and unit conversion
+  ([#3924](https://github.com/open-telemetry/opentelemetry-python/pull/3924))
+  - this is a breaking change to prometheus metric names so they comply with the
+  [specification](https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus).
+  - you can temporarily opt-out of the unit normalization by setting the environment variable
+  `OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION=true`
+  - common unit abbreviations are converted to Prometheus conventions (`s` -> `seconds`),
+  following the [collector's implementation](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/c0b51136575aa7ba89326d18edb4549e7e1bbdb9/pkg/translator/prometheus/normalize_name.go#L108)
+  - repeated `_` are replaced with a single `_`
+  - unit annotations (enclosed in curly braces like `{requests}`) are stripped away
+  - units with slash are converted e.g. `m/s` -> `meters_per_second`.
+  - The exporter's API is not changed
 
 ## Version 1.24.0/0.45b0 (2024-03-28)
 
diff --git a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py
index a84e21214d..7e6b2ecc01 100644
--- a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py
+++ b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py
@@ -67,8 +67,7 @@
 from json import dumps
 from logging import getLogger
 from os import environ
-from re import IGNORECASE, UNICODE, compile
-from typing import Dict, Sequence, Tuple, Union
+from typing import Deque, Dict, Iterable, Sequence, Tuple, Union
 
 from prometheus_client import start_http_server
 from prometheus_client.core import (
@@ -80,9 +79,15 @@
 )
 from prometheus_client.core import Metric as PrometheusMetric
 
+from opentelemetry.exporter.prometheus._mapping import (
+    map_unit,
+    sanitize_attribute,
+    sanitize_full_name,
+)
 from opentelemetry.sdk.environment_variables import (
     OTEL_EXPORTER_PROMETHEUS_HOST,
     OTEL_EXPORTER_PROMETHEUS_PORT,
+    OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION,
 )
 from opentelemetry.sdk.metrics import Counter
 from opentelemetry.sdk.metrics import Histogram as HistogramInstrument
@@ -101,6 +106,7 @@
     MetricsData,
     Sum,
 )
+from opentelemetry.util.types import Attributes
 
 _logger = getLogger(__name__)
 
@@ -164,10 +170,7 @@ class _CustomCollector:
 
     def __init__(self, disable_target_info: bool = False):
         self._callback = None
-        self._metrics_datas = deque()
-        self._non_letters_digits_underscore_re = compile(
-            r"[^\w]", UNICODE | IGNORECASE
-        )
+        self._metrics_datas: Deque[MetricsData] = deque()
         self._disable_target_info = disable_target_info
         self._target_info = None
 
@@ -175,7 +178,7 @@ def add_metrics_data(self, metrics_data: MetricsData) -> None:
         """Add metrics to Prometheus data"""
         self._metrics_datas.append(metrics_data)
 
-    def collect(self) -> None:
+    def collect(self) -> Iterable[PrometheusMetric]:
         """Collect fetches the metrics from OpenTelemetry
         and delivers them as Prometheus Metrics.
         Collect is invoked every time a ``prometheus.Gatherer`` is run
@@ -189,7 +192,7 @@ def collect(self) -> None:
         if len(self._metrics_datas):
             if not self._disable_target_info:
                 if self._target_info is None:
-                    attributes = {}
+                    attributes: Attributes = {}
                     for res in self._metrics_datas[0].resource_metrics:
                         attributes = {**attributes, **res.resource.attributes}
 
@@ -228,17 +231,29 @@ def _translate_to_prometheus(
 
             pre_metric_family_ids = []
 
-            metric_name = ""
-            metric_name += self._sanitize(metric.name)
+            metric_name = sanitize_full_name(metric.name)
 
             metric_description = metric.description or ""
 
+            # TODO(#3929): remove this opt-out option
+            disable_unit_normalization = (
+                environ.get(
+                    OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION,
+                    "false",
+                ).lower()
+                == "true"
+            )
+            if disable_unit_normalization:
+                metric_unit = metric.unit
+            else:
+                metric_unit = map_unit(metric.unit)
+
             for number_data_point in metric.data.data_points:
                 label_keys = []
                 label_values = []
 
                 for key, value in sorted(number_data_point.attributes.items()):
-                    label_keys.append(self._sanitize(key))
+                    label_keys.append(sanitize_attribute(key))
                     label_values.append(self._check_value(value))
 
                 pre_metric_family_ids.append(
@@ -247,7 +262,7 @@ def _translate_to_prometheus(
                             metric_name,
                             metric_description,
                             "%".join(label_keys),
-                            metric.unit,
+                            metric_unit,
                         ]
                     )
                 )
@@ -299,7 +314,7 @@ def _translate_to_prometheus(
                                 name=metric_name,
                                 documentation=metric_description,
                                 labels=label_keys,
-                                unit=metric.unit,
+                                unit=metric_unit,
                             )
                         )
                     metric_family_id_metric_family[
@@ -323,7 +338,7 @@ def _translate_to_prometheus(
                                 name=metric_name,
                                 documentation=metric_description,
                                 labels=label_keys,
-                                unit=metric.unit,
+                                unit=metric_unit,
                             )
                         )
                     metric_family_id_metric_family[
@@ -344,7 +359,7 @@ def _translate_to_prometheus(
                                 name=metric_name,
                                 documentation=metric_description,
                                 labels=label_keys,
-                                unit=metric.unit,
+                                unit=metric_unit,
                             )
                         )
                     metric_family_id_metric_family[
@@ -361,12 +376,6 @@ def _translate_to_prometheus(
                         "Unsupported metric data. %s", type(metric.data)
                     )
 
-    def _sanitize(self, key: str) -> str:
-        """sanitize the given metric name or label according to Prometheus rule.
-        Replace all characters other than [A-Za-z0-9_] with '_'.
-        """
-        return self._non_letters_digits_underscore_re.sub("_", key)
-
     # pylint: disable=no-self-use
     def _check_value(self, value: Union[int, float, str, Sequence]) -> str:
         """Check the label value and return is appropriate representation"""
@@ -380,7 +389,7 @@ def _create_info_metric(
         """Create an Info Metric Family with list of attributes"""
         # sanitize the attribute names according to Prometheus rule
         attributes = {
-            self._sanitize(key): self._check_value(value)
+            sanitize_attribute(key): self._check_value(value)
             for key, value in attributes.items()
         }
         info = InfoMetricFamily(name, description, labels=attributes)
diff --git a/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py
new file mode 100644
index 0000000000..077d2fbb2b
--- /dev/null
+++ b/exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/_mapping.py
@@ -0,0 +1,137 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from re import UNICODE, compile
+
+_SANITIZE_NAME_RE = compile(r"[^a-zA-Z0-9:]+", UNICODE)
+# Same as name, but doesn't allow ":"
+_SANITIZE_ATTRIBUTE_KEY_RE = compile(r"[^a-zA-Z0-9]+", UNICODE)
+
+# UCUM style annotations which are text enclosed in curly braces https://ucum.org/ucum#para-6.
+# This regex is more permissive than UCUM allows and matches any character within curly braces.
+_UNIT_ANNOTATION = compile(r"{.*}")
+
+# Remaps common UCUM and SI units to prometheus conventions. Copied from
+# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L19
+# See specification:
+# https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
+_UNIT_MAPPINGS = {
+    # Time
+    "d": "days",
+    "h": "hours",
+    "min": "minutes",
+    "s": "seconds",
+    "ms": "milliseconds",
+    "us": "microseconds",
+    "ns": "nanoseconds",
+    # Bytes
+    "By": "bytes",
+    "KiBy": "kibibytes",
+    "MiBy": "mebibytes",
+    "GiBy": "gibibytes",
+    "TiBy": "tibibytes",
+    "KBy": "kilobytes",
+    "MBy": "megabytes",
+    "GBy": "gigabytes",
+    "TBy": "terabytes",
+    # SI
+    "m": "meters",
+    "V": "volts",
+    "A": "amperes",
+    "J": "joules",
+    "W": "watts",
+    "g": "grams",
+    # Misc
+    "Cel": "celsius",
+    "Hz": "hertz",
+    # TODO(https://github.com/open-telemetry/opentelemetry-specification/issues/4058): the
+    # specification says to normalize "1" to ratio but that may change. Update this mapping or
+    # remove TODO once a decision is made.
+    "1": "",
+    "%": "percent",
+}
+# Similar to _UNIT_MAPPINGS, but for "per" unit denominator.
+# Example: s => per second (singular)
+# Copied from https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/80317ce83ed87a2dff0c316bb939afbfaa823d5e/pkg/translator/prometheus/normalize_name.go#L58
+_PER_UNIT_MAPPINGS = {
+    "s": "second",
+    "m": "minute",
+    "h": "hour",
+    "d": "day",
+    "w": "week",
+    "mo": "month",
+    "y": "year",
+}
+
+
+def sanitize_full_name(name: str) -> str:
+    """sanitize the given metric name according to Prometheus rule, including sanitizing
+    leading digits
+
+    https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
+    """
+    # Leading number special case
+    if name and name[0].isdigit():
+        name = "_" + name[1:]
+    return _sanitize_name(name)
+
+
+def _sanitize_name(name: str) -> str:
+    """sanitize the given metric name according to Prometheus rule, but does not handle
+    sanitizing a leading digit."""
+    return _SANITIZE_NAME_RE.sub("_", name)
+
+
+def sanitize_attribute(key: str) -> str:
+    """sanitize the given metric attribute key according to Prometheus rule.
+
+    https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-attributes
+    """
+    # Leading number special case
+    if key and key[0].isdigit():
+        key = "_" + key[1:]
+    return _SANITIZE_ATTRIBUTE_KEY_RE.sub("_", key)
+
+
+def map_unit(unit: str) -> str:
+    """Maps unit to common prometheus metric names if available and sanitizes any invalid
+    characters
+
+    See:
+    - https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
+    - https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L108
+    """
+    # remove curly brace unit annotations
+    unit = _UNIT_ANNOTATION.sub("", unit)
+
+    if unit in _UNIT_MAPPINGS:
+        return _UNIT_MAPPINGS[unit]
+
+    # replace "/" with "per" units like m/s -> meters_per_second
+    ratio_unit_subparts = unit.split("/", maxsplit=1)
+    if len(ratio_unit_subparts) == 2:
+        bottom = _sanitize_name(ratio_unit_subparts[1])
+        if bottom:
+            top = _sanitize_name(ratio_unit_subparts[0])
+            top = _UNIT_MAPPINGS.get(top, top)
+            bottom = _PER_UNIT_MAPPINGS.get(bottom, bottom)
+            return f"{top}_per_{bottom}" if top else f"per_{bottom}"
+
+    return (
+        # since units end up as a metric name suffix, they must be sanitized
+        _sanitize_name(unit)
+        # strip surrounding "_" chars since it will lead to consecutive underscores in the
+        # metric name
+        .strip("_")
+    )
diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py b/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py
new file mode 100644
index 0000000000..f2641de17a
--- /dev/null
+++ b/exporter/opentelemetry-exporter-prometheus/tests/test_mapping.py
@@ -0,0 +1,119 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import TestCase
+
+from opentelemetry.exporter.prometheus._mapping import (
+    map_unit,
+    sanitize_attribute,
+    sanitize_full_name,
+)
+
+
+class TestMapping(TestCase):
+    def test_sanitize_full_name(self):
+        self.assertEqual(
+            sanitize_full_name("valid_metric_name"), "valid_metric_name"
+        )
+        self.assertEqual(
+            sanitize_full_name("VALID_METRIC_NAME"), "VALID_METRIC_NAME"
+        )
+        self.assertEqual(
+            sanitize_full_name("_valid_metric_name"), "_valid_metric_name"
+        )
+        self.assertEqual(
+            sanitize_full_name("valid:metric_name"), "valid:metric_name"
+        )
+        self.assertEqual(
+            sanitize_full_name("valid_1_metric_name"), "valid_1_metric_name"
+        )
+        self.assertEqual(
+            sanitize_full_name("1leading_digit"), "_leading_digit"
+        )
+        self.assertEqual(
+            sanitize_full_name("consective_____underscores"),
+            "consective_underscores",
+        )
+        self.assertEqual(
+            sanitize_full_name("1_~#consective_underscores"),
+            "_consective_underscores",
+        )
+        self.assertEqual(
+            sanitize_full_name("1!2@3#4$5%6^7&8*9(0)_-"),
+            "_2_3_4_5_6_7_8_9_0_",
+        )
+        self.assertEqual(sanitize_full_name("foo,./?;:[]{}bar"), "foo_:_bar")
+        self.assertEqual(sanitize_full_name("TestString"), "TestString")
+        self.assertEqual(sanitize_full_name("aAbBcC_12_oi"), "aAbBcC_12_oi")
+
+    def test_sanitize_attribute(self):
+        self.assertEqual(
+            sanitize_attribute("valid_attr_key"), "valid_attr_key"
+        )
+        self.assertEqual(
+            sanitize_attribute("VALID_attr_key"), "VALID_attr_key"
+        )
+        self.assertEqual(
+            sanitize_attribute("_valid_attr_key"), "_valid_attr_key"
+        )
+        self.assertEqual(
+            sanitize_attribute("valid_1_attr_key"), "valid_1_attr_key"
+        )
+        self.assertEqual(
+            sanitize_attribute("sanitize:colons"), "sanitize_colons"
+        )
+        self.assertEqual(
+            sanitize_attribute("1leading_digit"), "_leading_digit"
+        )
+        self.assertEqual(
+            sanitize_attribute("1_~#consective_underscores"),
+            "_consective_underscores",
+        )
+        self.assertEqual(
+            sanitize_attribute("1!2@3#4$5%6^7&8*9(0)_-"),
+            "_2_3_4_5_6_7_8_9_0_",
+        )
+        self.assertEqual(sanitize_attribute("foo,./?;:[]{}bar"), "foo_bar")
+        self.assertEqual(sanitize_attribute("TestString"), "TestString")
+        self.assertEqual(sanitize_attribute("aAbBcC_12_oi"), "aAbBcC_12_oi")
+
+    def test_map_unit(self):
+        # select hardcoded mappings
+        self.assertEqual(map_unit("s"), "seconds")
+        self.assertEqual(map_unit("By"), "bytes")
+        self.assertEqual(map_unit("m"), "meters")
+        # should work with UCUM annotations as well
+        self.assertEqual(map_unit("g{dogfood}"), "grams")
+
+        # UCUM "default unit" aka unity and equivalent UCUM annotations should be stripped
+        self.assertEqual(map_unit("1"), "")
+        self.assertEqual(map_unit("{}"), "")
+        self.assertEqual(map_unit("{request}"), "")
+        self.assertEqual(map_unit("{{{;@#$}}}"), "")
+        self.assertEqual(map_unit("{unit with space}"), "")
+
+        # conversion of per units
+        self.assertEqual(map_unit("km/h"), "km_per_hour")
+        self.assertEqual(map_unit("m/s"), "meters_per_second")
+        self.assertEqual(map_unit("{foo}/s"), "per_second")
+        self.assertEqual(map_unit("foo/bar"), "foo_per_bar")
+        self.assertEqual(map_unit("2fer/store"), "2fer_per_store")
+
+        # should be sanitized to become part of the metric name without surrounding "_"
+        self.assertEqual(map_unit("____"), "")
+        self.assertEqual(map_unit("____"), "")
+        self.assertEqual(map_unit("1:foo#@!"), "1:foo")
+        # should not be interpreted as a per unit since there is no denominator
+        self.assertEqual(map_unit("m/"), "m")
+        self.assertEqual(map_unit("m/{bar}"), "m")
diff --git a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py
index 7f0cbaaaee..ed11bb9ab7 100644
--- a/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py
+++ b/exporter/opentelemetry-exporter-prometheus/tests/test_prometheus_exporter.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from os import environ
 from textwrap import dedent
 from unittest import TestCase
 from unittest.mock import Mock, patch
@@ -27,6 +28,9 @@
     PrometheusMetricReader,
     _CustomCollector,
 )
+from opentelemetry.sdk.environment_variables import (
+    OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION,
+)
 from opentelemetry.sdk.metrics import MeterProvider
 from opentelemetry.sdk.metrics.export import (
     AggregationTemporality,
@@ -40,6 +44,7 @@
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.test.metrictestutil import (
     _generate_gauge,
+    _generate_histogram,
     _generate_sum,
     _generate_unsupported_metric,
 )
@@ -53,6 +58,31 @@ def setUp(self):
             side_effect=self._mock_registry_register,
         )
 
+    def verify_text_format(
+        self, metric: Metric, expect_prometheus_text: str
+    ) -> None:
+        metrics_data = MetricsData(
+            resource_metrics=[
+                ResourceMetrics(
+                    resource=Mock(),
+                    scope_metrics=[
+                        ScopeMetrics(
+                            scope=Mock(),
+                            metrics=[metric],
+                            schema_url="schema_url",
+                        )
+                    ],
+                    schema_url="schema_url",
+                )
+            ]
+        )
+
+        collector = _CustomCollector(disable_target_info=True)
+        collector.add_metrics_data(metrics_data)
+        result_bytes = generate_latest(collector)
+        result = result_bytes.decode("utf-8")
+        self.assertEqual(result, expect_prometheus_text)
+
     # pylint: disable=protected-access
     def test_constructor(self):
         """Test the constructor."""
@@ -90,37 +120,17 @@ def test_histogram_to_prometheus(self):
                 aggregation_temporality=AggregationTemporality.DELTA,
             ),
         )
-        metrics_data = MetricsData(
-            resource_metrics=[
-                ResourceMetrics(
-                    resource=Mock(),
-                    scope_metrics=[
-                        ScopeMetrics(
-                            scope=Mock(),
-                            metrics=[metric],
-                            schema_url="schema_url",
-                        )
-                    ],
-                    schema_url="schema_url",
-                )
-            ]
-        )
-
-        collector = _CustomCollector(disable_target_info=True)
-        collector.add_metrics_data(metrics_data)
-        result_bytes = generate_latest(collector)
-        result = result_bytes.decode("utf-8")
-        self.assertEqual(
-            result,
+        self.verify_text_format(
+            metric,
             dedent(
                 """\
-                # HELP test_name_s foo
-                # TYPE test_name_s histogram
-                test_name_s_bucket{histo="1",le="123.0"} 1.0
-                test_name_s_bucket{histo="1",le="456.0"} 4.0
-                test_name_s_bucket{histo="1",le="+Inf"} 6.0
-                test_name_s_count{histo="1"} 6.0
-                test_name_s_sum{histo="1"} 579.0
+                # HELP test_name_seconds foo
+                # TYPE test_name_seconds histogram
+                test_name_seconds_bucket{histo="1",le="123.0"} 1.0
+                test_name_seconds_bucket{histo="1",le="456.0"} 4.0
+                test_name_seconds_bucket{histo="1",le="+Inf"} 6.0
+                test_name_seconds_count{histo="1"} 6.0
+                test_name_seconds_sum{histo="1"} 579.0
                 """
             ),
         )
@@ -270,16 +280,6 @@ def test_invalid_metric(self):
         collector.collect()
         self.assertLogs("opentelemetry.exporter.prometheus", level="WARNING")
 
-    def test_sanitize(self):
-        collector = _CustomCollector()
-        self.assertEqual(
-            collector._sanitize("1!2@3#4$5%6^7&8*9(0)_-"),
-            "1_2_3_4_5_6_7_8_9_0___",
-        )
-        self.assertEqual(collector._sanitize(",./?;:[]{}"), "__________")
-        self.assertEqual(collector._sanitize("TestString"), "TestString")
-        self.assertEqual(collector._sanitize("aAbBcC_12_oi"), "aAbBcC_12_oi")
-
     def test_list_labels(self):
         labels = {"environment@": ["1", "2", "3"], "os": "Unix"}
         metric = _generate_gauge(
@@ -457,3 +457,209 @@ def test_label_order_does_not_matter(self):
         # Only one metric is generated
         metric_count = prometheus_output.count("# HELP counter_total")
         self.assertEqual(metric_count, 1)
+
+    def test_metric_name(self):
+        self.verify_text_format(
+            _generate_sum(name="test_counter", value=1, unit=""),
+            dedent(
+                """\
+                # HELP test_counter_total foo
+                # TYPE test_counter_total counter
+                test_counter_total{a="1",b="true"} 1.0
+                """
+            ),
+        )
+        self.verify_text_format(
+            _generate_sum(name="1leading_digit", value=1, unit=""),
+            dedent(
+                """\
+                # HELP _leading_digit_total foo
+                # TYPE _leading_digit_total counter
+                _leading_digit_total{a="1",b="true"} 1.0
+                """
+            ),
+        )
+        self.verify_text_format(
+            _generate_sum(name="!@#counter_invalid_chars", value=1, unit=""),
+            dedent(
+                """\
+                # HELP _counter_invalid_chars_total foo
+                # TYPE _counter_invalid_chars_total counter
+                _counter_invalid_chars_total{a="1",b="true"} 1.0
+                """
+            ),
+        )
+
+    def test_metric_name_with_unit(self):
+        self.verify_text_format(
+            _generate_gauge(name="test.metric.no_unit", value=1, unit=""),
+            dedent(
+                """\
+                # HELP test_metric_no_unit foo
+                # TYPE test_metric_no_unit gauge
+                test_metric_no_unit{a="1",b="true"} 1.0
+                """
+            ),
+        )
+        self.verify_text_format(
+            _generate_gauge(
+                name="test.metric.spaces", value=1, unit="   \t  "
+            ),
+            dedent(
+                """\
+                # HELP test_metric_spaces foo
+                # TYPE test_metric_spaces gauge
+                test_metric_spaces{a="1",b="true"} 1.0
+                """
+            ),
+        )
+
+        # UCUM annotations should be stripped
+        self.verify_text_format(
+            _generate_sum(name="test_counter", value=1, unit="{requests}"),
+            dedent(
+                """\
+                # HELP test_counter_total foo
+                # TYPE test_counter_total counter
+                test_counter_total{a="1",b="true"} 1.0
+                """
+            ),
+        )
+
+        # slash converts to "per"
+        self.verify_text_format(
+            _generate_gauge(name="test_gauge", value=1, unit="m/s"),
+            dedent(
+                """\
+                # HELP test_gauge_meters_per_second foo
+                # TYPE test_gauge_meters_per_second gauge
+                test_gauge_meters_per_second{a="1",b="true"} 1.0
+                """
+            ),
+        )
+
+        # invalid characters in name are sanitized before being passed to prom client, which
+        # would throw errors
+        self.verify_text_format(
+            _generate_sum(name="test_counter", value=1, unit="%{foo}@?"),
+            dedent(
+                """\
+                # HELP test_counter_total foo
+                # TYPE test_counter_total counter
+                test_counter_total{a="1",b="true"} 1.0
+                """
+            ),
+        )
+
+    # TODO(#3929): remove this opt-out option
+    @patch.dict(
+        environ,
+        {
+            OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION: "true"
+        },
+    )
+    def test_metric_name_with_unit_normalization_disabled(self):
+        self.verify_text_format(
+            _generate_sum(name="test_unit_not_normalized", value=1, unit="s"),
+            dedent(
+                """\
+                # HELP test_unit_not_normalized_s_total foo
+                # TYPE test_unit_not_normalized_s_total counter
+                test_unit_not_normalized_s_total{a="1",b="true"} 1.0
+                """
+            ),
+        )
+
+    def test_semconv(self):
+        """Tests that a few select semconv metrics get converted to the expected prometheus
+        text format"""
+        self.verify_text_format(
+            _generate_sum(
+                name="system.filesystem.usage",
+                value=1,
+                is_monotonic=False,
+                unit="By",
+            ),
+            dedent(
+                """\
+                # HELP system_filesystem_usage_bytes foo
+                # TYPE system_filesystem_usage_bytes gauge
+                system_filesystem_usage_bytes{a="1",b="true"} 1.0
+                """
+            ),
+        )
+        self.verify_text_format(
+            _generate_sum(
+                name="system.network.dropped",
+                value=1,
+                unit="{packets}",
+            ),
+            dedent(
+                """\
+                # HELP system_network_dropped_total foo
+                # TYPE system_network_dropped_total counter
+                system_network_dropped_total{a="1",b="true"} 1.0
+                """
+            ),
+        )
+        self.verify_text_format(
+            _generate_histogram(
+                name="http.server.request.duration",
+                unit="s",
+            ),
+            dedent(
+                """\
+                # HELP http_server_request_duration_seconds foo
+                # TYPE http_server_request_duration_seconds histogram
+                http_server_request_duration_seconds_bucket{a="1",b="true",le="123.0"} 1.0
+                http_server_request_duration_seconds_bucket{a="1",b="true",le="456.0"} 4.0
+                http_server_request_duration_seconds_bucket{a="1",b="true",le="+Inf"} 6.0
+                http_server_request_duration_seconds_count{a="1",b="true"} 6.0
+                http_server_request_duration_seconds_sum{a="1",b="true"} 579.0
+                """
+            ),
+        )
+        self.verify_text_format(
+            _generate_sum(
+                name="http.server.active_requests",
+                value=1,
+                unit="{request}",
+                is_monotonic=False,
+            ),
+            dedent(
+                """\
+                # HELP http_server_active_requests foo
+                # TYPE http_server_active_requests gauge
+                http_server_active_requests{a="1",b="true"} 1.0
+                """
+            ),
+        )
+        # if the metric name already contains the unit, it shouldn't be added again
+        self.verify_text_format(
+            _generate_sum(
+                name="metric_name_with_myunit",
+                value=1,
+                unit="myunit",
+            ),
+            dedent(
+                """\
+                # HELP metric_name_with_myunit_total foo
+                # TYPE metric_name_with_myunit_total counter
+                metric_name_with_myunit_total{a="1",b="true"} 1.0
+                """
+            ),
+        )
+        self.verify_text_format(
+            _generate_gauge(
+                name="metric_name_percent",
+                value=1,
+                unit="%",
+            ),
+            dedent(
+                """\
+                # HELP metric_name_percent foo
+                # TYPE metric_name_percent gauge
+                metric_name_percent{a="1",b="true"} 1.0
+                """
+            ),
+        )
diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py
index 6557d2a139..99899539a1 100644
--- a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py
+++ b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables.py
@@ -710,3 +710,21 @@
 This is an experimental environment variable and the name of this variable and its behavior can
 change in a non-backwards compatible way.
 """
+
+
+# TODO(#3929): remove this opt-out option
+OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION = (
+    "OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION"
+)
+"""
+.. envvar:: OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION
+
+The :envvar:`OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION` environment
+variable allows you to opt-out of the unit normalization described `in the specification
+<https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1>`_.
+
+Default: False
+
+This is an temporary environment variable that provides backward compatibility but will be
+removed in the future https://github.com/open-telemetry/opentelemetry-python/issues/3929.
+"""
diff --git a/tests/opentelemetry-test-utils/src/opentelemetry/test/metrictestutil.py b/tests/opentelemetry-test-utils/src/opentelemetry/test/metrictestutil.py
index ff25b092a6..08354bf0de 100644
--- a/tests/opentelemetry-test-utils/src/opentelemetry/test/metrictestutil.py
+++ b/tests/opentelemetry-test-utils/src/opentelemetry/test/metrictestutil.py
@@ -13,14 +13,19 @@
 # limitations under the License.
 
 
+from typing import Optional
+
 from opentelemetry.attributes import BoundedAttributes
 from opentelemetry.sdk.metrics.export import (
     AggregationTemporality,
     Gauge,
+    Histogram,
+    HistogramDataPoint,
     Metric,
     NumberDataPoint,
     Sum,
 )
+from opentelemetry.util.types import Attributes
 
 
 def _generate_metric(
@@ -98,3 +103,34 @@ def _generate_unsupported_metric(
         description=description,
         unit=unit,
     )
+
+
+def _generate_histogram(
+    name: str,
+    attributes: Attributes = None,
+    description: Optional[str] = None,
+    unit: Optional[str] = None,
+) -> Metric:
+    if attributes is None:
+        attributes = BoundedAttributes(attributes={"a": 1, "b": True})
+    return _generate_metric(
+        name,
+        Histogram(
+            data_points=[
+                HistogramDataPoint(
+                    attributes=attributes,
+                    start_time_unix_nano=1641946016139533244,
+                    time_unix_nano=1641946016139533244,
+                    count=6,
+                    sum=579.0,
+                    bucket_counts=[1, 3, 2],
+                    explicit_bounds=[123.0, 456.0],
+                    min=1,
+                    max=457,
+                )
+            ],
+            aggregation_temporality=AggregationTemporality.CUMULATIVE,
+        ),
+        description=description,
+        unit=unit,
+    )