Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix: use device in all Torch models #5026

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fiftyone/utils/clip/zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def _predict_all(self, imgs):
frame_size = (width, height)

if self._using_gpu:
imgs = imgs.cuda()
imgs = imgs.to(self.device)

text_features = self._get_text_features()
image_features = self._model.encode_image(imgs)
Expand Down
10 changes: 6 additions & 4 deletions fiftyone/utils/open_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _get_text_features(self):
# Tokenize text
text = self._tokenizer(prompts)
if self._using_gpu:
text = text.cuda()
text = text.to(self.device)
self._text_features = self._model.encode_text(text)

return self._text_features
Expand All @@ -118,7 +118,7 @@ def _embed_prompts(self, prompts):
# Tokenize text
text = self._tokenizer(formatted_prompts)
if self._using_gpu:
text = text.cuda()
text = text.to(self.device)
return self._model.encode_text(text)

def _get_class_logits(self, text_features, image_features):
Expand All @@ -143,9 +143,11 @@ def _predict_all(self, imgs):
frame_size = (width, height)

if self._using_gpu:
imgs = imgs.cuda()
imgs = imgs.to(self.device)
Copy link
Contributor

@coderabbitai coderabbitai bot Nov 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Consider adding error handling for device transfers.

While the device handling change is correct, the subsequent autocast context is hardcoded to "cuda". This might cause issues when running on CPU.

Apply this diff to make it device-aware:

         if self._using_gpu:
             imgs = imgs.to(self.device)
 
-        with torch.no_grad(), torch.amp.autocast("cuda"):
+        with torch.no_grad(), torch.amp.autocast(device_type=self.device.type if self._using_gpu else "cpu"):
             image_features = self._model.encode_image(imgs)
             text_features = self._get_text_features()

Also applies to: 147-152

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jacobsela coderabbit raises an interesting point here. Does torch.amp.autocast("cuda") need to be updated?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is valid and will cause problems if not handled. It's in my todo for this week to more thoroughly review the code before moving further with this PR because this message makes me think that there are probably more places I haven't noticed that make hardware assumptions.


with torch.no_grad(), torch.amp.autocast("cuda"):
with torch.no_grad(), torch.amp.autocast(
device_type=self.device.type if self._using_gpu else "cpu"
):
image_features = self._model.encode_image(imgs)
text_features = self._get_text_features()

Expand Down
2 changes: 1 addition & 1 deletion fiftyone/utils/super_gradients.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _load_model(self, config):
)

if self._using_gpu:
model = model.cuda()
model = model.to(self.device)

return model

Expand Down
42 changes: 22 additions & 20 deletions fiftyone/utils/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ class FiftyOneTransformerConfig(Config, HasZooModel):
def __init__(self, d):
self.model = self.parse_raw(d, "model", default=None)
self.name_or_path = self.parse_string(d, "name_or_path", default=None)
self.device = self.parse_string(
d, "device", default="cuda" if torch.cuda.is_available() else "cpu"
)
if etau.is_str(self.model):
self.name_or_path = self.model
self.model = None
Expand Down Expand Up @@ -451,7 +454,8 @@ class FiftyOneTransformer(TransformerEmbeddingsMixin, Model):
def __init__(self, config):
self.config = config
self.model = self._load_model(config)
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.device = torch.device(self.config.device)
self.model.to(self.device)
self.image_processor = self._load_image_processor()

@property
Expand Down Expand Up @@ -496,7 +500,8 @@ def __init__(self, config):
self.config = config
self.classes = config.classes
self.model = self._load_model(config)
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.device = torch.device(self.config.device)
self.model.to(self.device)
jacobsela marked this conversation as resolved.
Show resolved Hide resolved
self.processor = self._load_processor()
self._text_prompts = None

Expand Down Expand Up @@ -581,7 +586,7 @@ def _load_model(self, config):
if config.model is not None:
return config.model

device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(config.device)
model = transformers.AutoModel.from_pretrained(config.name_or_path).to(
device
)
Expand Down Expand Up @@ -641,7 +646,7 @@ def _predict_from_retrieval(self, arg):
with torch.no_grad():
for text_prompt in text_prompts:
inputs = self.processor(arg, text_prompt, return_tensors="pt")
outputs = self.model(**inputs.to(self.device))
outputs = self.model(**(inputs.to(self.device)))
logits.append(outputs.logits[0, :].item())

logits = np.array(logits)
Expand Down Expand Up @@ -693,14 +698,14 @@ class FiftyOneTransformerForImageClassification(FiftyOneTransformer):
def _load_model(self, config):
if config.model is not None:
return config.model
device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(config.device)
return transformers.AutoModelForImageClassification.from_pretrained(
config.name_or_path
).to(device)

def _predict(self, inputs):
with torch.no_grad():
results = self.model(**inputs.to(self.device))
results = self.model(**(inputs.to(self.device)))
return to_classification(results, self.model.config.id2label)

def predict(self, arg):
Expand Down Expand Up @@ -748,7 +753,8 @@ def __init__(self, config):
self.classes = config.classes
self.processor = self._load_processor(config)
self.model = self._load_model(config)
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.device = torch.device(self.config.device)
self.model.to(self.device)
self._text_prompts = None

def _load_processor(self, config):
Expand All @@ -757,9 +763,7 @@ def _load_processor(self, config):
if config.model is not None:
name_or_path = config.model.name_or_path

return transformers.AutoProcessor.from_pretrained(name_or_path).to(
self.device
)
return transformers.AutoProcessor.from_pretrained(name_or_path)

def _load_model(self, config):
name_or_path = config.name_or_path
Expand All @@ -770,7 +774,9 @@ def _load_model(self, config):
if config.model is not None:
return config.model
else:
return _get_detector_from_processor(self.processor, name_or_path)
return _get_detector_from_processor(
self.processor, name_or_path
).to(config.device)

def _process_inputs(self, args):
text_prompts = self._get_text_prompts()
Expand All @@ -781,7 +787,7 @@ def _process_inputs(self, args):

def _predict(self, inputs, target_sizes):
with torch.no_grad():
outputs = self.model(**inputs.to(self.device))
outputs = self.model(**(inputs.to(self.device)))

results = self.processor.image_processor.post_process_object_detection(
outputs, target_sizes=target_sizes
Expand Down Expand Up @@ -821,10 +827,9 @@ class FiftyOneTransformerForObjectDetection(FiftyOneTransformer):
def _load_model(self, config):
if config.model is not None:
return config.model
device = "cuda" if torch.cuda.is_available() else "cpu"
return transformers.AutoModelForObjectDetection.from_pretrained(
config.name_or_path
).to(device)
).to(config.device)

def _predict(self, inputs, target_sizes):
with torch.no_grad():
Expand Down Expand Up @@ -875,11 +880,10 @@ def _load_model(self, config):
if config.model is not None:
model = config.model
else:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = (
transformers.AutoModelForSemanticSegmentation.from_pretrained(
config.name_or_path
).to(device)
).to(config.device)
)

self.mask_targets = model.config.id2label
Expand Down Expand Up @@ -929,10 +933,9 @@ class FiftyOneTransformerForDepthEstimation(FiftyOneTransformer):
def _load_model(self, config):
if config.model is not None:
return config.model
device = "cuda" if torch.cuda.is_available() else "cpu"
return transformers.AutoModelForDepthEstimation.from_pretrained(
config.name_or_path
).to(device)
).to(config.device)

def _predict(self, inputs, target_sizes):
with torch.no_grad():
Expand Down Expand Up @@ -1084,5 +1087,4 @@ def _get_detector_from_processor(processor, model_name_or_path):
__import__(module_name, fromlist=[detector_class_name]),
detector_class_name,
)
device = "cuda" if torch.cuda.is_available() else "cpu"
return detector_class.from_pretrained(model_name_or_path).to(device)
return detector_class.from_pretrained(model_name_or_path)
6 changes: 6 additions & 0 deletions fiftyone/utils/ultralytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import fiftyone.zoo.models as fozm

ultralytics = fou.lazy_import("ultralytics")
torch = fou.lazy_import("torch")
brimoor marked this conversation as resolved.
Show resolved Hide resolved


def convert_ultralytics_model(model):
Expand Down Expand Up @@ -378,6 +379,9 @@ def __init__(self, d):
self.model_name = self.parse_raw(d, "model_name", default=None)
self.model_path = self.parse_raw(d, "model_path", default=None)
self.classes = self.parse_array(d, "classes", default=None)
self.device = self.parse_string(
d, "device", default="cuda" if torch.cuda.is_available() else "cpu"
)


class FiftyOneYOLOModel(Model):
Expand All @@ -390,6 +394,8 @@ class FiftyOneYOLOModel(Model):
def __init__(self, config):
self.config = config
self.model = self._load_model(config)
self.device = torch.device(config.device)
self.model.to(self.device)

def _load_model(self, config):
if config.model is not None:
Expand Down
15 changes: 10 additions & 5 deletions fiftyone/zoo/models/manifest-torch.json
Original file line number Diff line number Diff line change
Expand Up @@ -2968,7 +2968,8 @@
"entrypoint_args": {
"repo_or_dir": "ultralytics/yolov5",
"model": "yolov5n",
"pretrained": true
"pretrained": true,
"device": "cpu"
},
"output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor",
"raw_inputs": true
Expand Down Expand Up @@ -2998,7 +2999,8 @@
"entrypoint_args": {
"repo_or_dir": "ultralytics/yolov5",
"model": "yolov5s",
"pretrained": true
"pretrained": true,
"device": "cpu"
},
"output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor",
"raw_inputs": true
Expand Down Expand Up @@ -3028,7 +3030,8 @@
"entrypoint_args": {
"repo_or_dir": "ultralytics/yolov5",
"model": "yolov5m",
"pretrained": true
"pretrained": true,
"device": "cpu"
},
"output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor",
"raw_inputs": true
Expand Down Expand Up @@ -3058,7 +3061,8 @@
"entrypoint_args": {
"repo_or_dir": "ultralytics/yolov5",
"model": "yolov5l",
"pretrained": true
"pretrained": true,
"device": "cpu"
},
"output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor",
"raw_inputs": true
Expand Down Expand Up @@ -4555,7 +4559,8 @@
"entrypoint_args": {
"repo_or_dir": "ultralytics/yolov5",
"model": "yolov5x",
"pretrained": true
"pretrained": true,
"device": "cpu"
},
"output_processor_cls": "fiftyone.utils.ultralytics.UltralyticsOutputProcessor",
"raw_inputs": true
Expand Down
Loading