Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate transcoding as subtitle #2647

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/backend/marsha/core/api/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -1226,7 +1226,7 @@ def initiate_transcript(self, request, pk=None):
timed_text_track = TimedTextTrack.objects.create(
video=video,
language=settings.LANGUAGES[0][0],
mode=TimedTextTrack.TRANSCRIPT,
mode=TimedTextTrack.SUBTITLE,
upload_state=defaults.PROCESSING,
)
except IntegrityError:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Management command to delete transcoding temp files."""

import logging

from django.core.management import BaseCommand

from marsha.core.utils.transcode import delete_transcoding_temp_files


logger = logging.getLogger(__name__)


class Command(BaseCommand):
"""Delete transcoding temp files."""

help = "Delete transcoding temp files"

def handle(self, *args, **options):
"""Delete all transcoding temp files."""
delete_transcoding_temp_files()
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_api_video_initiate_transcript_token_user(self):
mock_dispatch_video.assert_called_once_with(video)

self.assertEqual(timed_text_track.language, "en")
self.assertEqual(timed_text_track.mode, models.TimedTextTrack.TRANSCRIPT)
self.assertEqual(timed_text_track.mode, models.TimedTextTrack.SUBTITLE)
self.assertEqual(timed_text_track.upload_state, defaults.PROCESSING)

def test_api_video_initiate_transcript_staff_or_user(self):
Expand Down Expand Up @@ -136,7 +136,7 @@ def test_api_video_initiate_transcript_existing_timed_text_track(self):
factories.TimedTextTrackFactory(
video=video,
language="en",
mode=models.TimedTextTrack.TRANSCRIPT,
mode=models.TimedTextTrack.SUBTITLE,
upload_state=defaults.READY,
)
jwt_token = InstructorOrAdminLtiTokenFactory(playlist=video.playlist)
Expand Down Expand Up @@ -204,5 +204,5 @@ def test_api_video_initiate_transcript_aws_pipeline(self):
mock_dispatch_video.assert_called_once_with(video)

self.assertEqual(timed_text_track.language, "en")
self.assertEqual(timed_text_track.mode, models.TimedTextTrack.TRANSCRIPT)
self.assertEqual(timed_text_track.mode, models.TimedTextTrack.SUBTITLE)
self.assertEqual(timed_text_track.upload_state, defaults.PROCESSING)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Test transcript_video command."""

from unittest.mock import patch

from django.core.management import call_command
from django.test import TestCase

from marsha.core.utils import transcode


@patch.object(transcode, "delete_transcoding_temp_files")
class TranscriptVideosTestCase(TestCase):
"""
Test case for the transcript_videos command.
"""

maxDiff = None

def test_delete_transcoding_temp_files(self, mock_delete_temp_files):
"""Should call delete_transcoding_temp_files function."""
call_command("delete_transcoding_temp_files")

mock_delete_temp_files.assert_called_once()
62 changes: 50 additions & 12 deletions src/backend/marsha/core/tests/utils/test_transcode.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
"""Tests for the `core.utils.transcode` module."""

from unittest.mock import patch
from uuid import uuid4

from django.test import TestCase

from django_peertube_runner_connector.factories import (
VideoFactory as TranscodedVideoFactory,
)
from django_peertube_runner_connector.models import (
Video as TranscodedVideo,
VideoFile,
Expand All @@ -12,11 +18,14 @@
from marsha.core import defaults
from marsha.core.factories import VideoFactory
from marsha.core.utils.time_utils import to_datetime
from marsha.core.utils.transcode import transcoding_ended_callback
from marsha.core.utils.transcode import (
delete_transcoding_temp_files,
transcoding_ended_callback,
)


class TranscodeTestCase(TestCase):
"""Test the `transcode` functions."""
class TranscodingEndedCallbackTestCase(TestCase):
"""Test the `transcoding_ended_callback` function."""

def setUp(self):
# Create a test video
Expand All @@ -31,6 +40,7 @@ def setUp(self):
state=VideoState.PUBLISHED,
directory=f"vod/{self.video.pk}/video/1698941501",
)

self.video_playlist = VideoStreamingPlaylist.objects.create(
video=self.transcoded_video
)
Expand All @@ -50,31 +60,59 @@ def setUp(self):
extname="mp4",
)

def test_transcoding_ended_callback(self):
@patch("marsha.core.utils.transcode.delete_temp_file")
def test_transcoding_ended_callback(self, mock_delete_temp_file):
"""The marsha video should correctly be updated."""
transcoding_ended_callback(self.transcoded_video)

self.video.refresh_from_db()

self.assertEqual(self.video.uploaded_on, to_datetime("1698941501"))

self.assertEqual(self.video.resolutions, [720, 1080])

self.assertEqual(self.video.upload_state, defaults.READY)
self.assertEqual(self.video.transcode_pipeline, defaults.PEERTUBE_PIPELINE)
mock_delete_temp_file.assert_called_once_with(
self.transcoded_video, f"tmp/{self.video.pk}/video/1698941501"
)

def test_transcoding_ended_callback_with_error(self):
@patch("marsha.core.utils.transcode.delete_temp_file")
def test_transcoding_ended_callback_with_error(self, mock_delete_temp_file):
"""The marsha video should be set with state on error and nothing else should be done."""
self.transcoded_video.state = VideoState.TRANSCODING_FAILED

transcoding_ended_callback(self.transcoded_video)

self.video.refresh_from_db()

self.assertEqual(self.video.upload_state, defaults.ERROR)

self.assertEqual(self.video.uploaded_on, None)

self.assertEqual(self.video.resolutions, [])

self.assertEqual(self.video.transcode_pipeline, None)
mock_delete_temp_file.assert_called_once_with(
self.transcoded_video, f"tmp/{self.video.pk}/video/1698941501"
)


class DeleteTranscodingTempFilesTestCase(TestCase):
"""Test the `delete_transcoding_temp_files` function."""

@patch("marsha.core.utils.transcode.delete_temp_file")
def test_transcoding_delete_transcoding_temp_files(self, mock_delete_temp_file):
"""The temp files should be deleted."""
video_id = uuid4()
video_file = TranscodedVideoFactory(
directory=f"vod/{video_id}/video/1698941501"
)

delete_transcoding_temp_files()

mock_delete_temp_file.assert_called_once_with(
video_file, f"tmp/{video_id}/video/1698941501"
)

@patch("marsha.core.utils.transcode.delete_temp_file")
def test_transcoding_delete_transcoding_temp_files_all(self, mock_delete_temp_file):
"""All video files should be processed."""
TranscodedVideoFactory.create_batch(15)

delete_transcoding_temp_files()

self.assertEqual(mock_delete_temp_file.call_count, 15)
23 changes: 13 additions & 10 deletions src/backend/marsha/core/tests/utils/test_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,13 @@ def test_transcription_ended_callback(self):

timed_text_track = video.timedtexttracks.get()
self.assertEqual(timed_text_track.language, language)
self.assertEqual(timed_text_track.mode, TimedTextTrack.TRANSCRIPT)
self.assertEqual(timed_text_track.mode, TimedTextTrack.SUBTITLE)
self.assertEqual(timed_text_track.upload_state, defaults.READY)
self.assertEqual(timed_text_track.extension, "vtt")

video.refresh_from_db()
self.assertTrue(video.should_use_subtitle_as_transcript)

ttt_path = timed_text_track.get_videos_storage_prefix()
self.assertTrue(
video_storage.exists(f"{ttt_path}/source.{timed_text_track.extension}")
Expand Down Expand Up @@ -114,39 +117,39 @@ def test_transcript_video_no_video(self, mock_launch_video_transcript):
mock_launch_video_transcript.delay.assert_not_called()

@patch.object(transcript_utils, "launch_video_transcript")
def test_transcript_video_already_transcript(self, mock_launch_video_transcript):
def test_transcript_video_already_subtitle(self, mock_launch_video_transcript):
"""
Should not call the launch_video_transcript function
if the video already has a transcript.
if the video already has a subtitle.
"""
timed_text_track = TimedTextTrackFactory(
video=VideoFactory(upload_state=defaults.READY),
language=settings.LANGUAGES[0][0],
mode=TimedTextTrack.TRANSCRIPT,
mode=TimedTextTrack.SUBTITLE,
)

with self.assertRaises(transcript_utils.TranscriptError) as context:
transcript_utils.transcript(timed_text_track.video)

self.assertEqual(
str(context.exception),
f"A transcript already exists for video {timed_text_track.video.id}",
f"A subtitle already exists for video {timed_text_track.video.id}",
)
mock_launch_video_transcript.delay.assert_not_called()

@patch.object(transcript_utils, "launch_video_transcript")
def test_transcript_video_already_subtitle(self, mock_launch_video_transcript):
def test_transcript_video_already_transcript(self, mock_launch_video_transcript):
"""
Should call the launch_video_transcript function
if the video has a subtitle.
if the video has a transcript.
"""
timed_text_track = TimedTextTrackFactory(
video=VideoFactory(
upload_state=defaults.READY,
transcode_pipeline=defaults.PEERTUBE_PIPELINE,
),
language=settings.LANGUAGES[0][0],
mode=TimedTextTrack.SUBTITLE,
mode=TimedTextTrack.TRANSCRIPT,
)

transcript_utils.transcript(timed_text_track.video)
Expand All @@ -159,7 +162,7 @@ def test_transcript_video_already_subtitle(self, mock_launch_video_transcript):
self.assertEqual(timed_text_track.video.timedtexttracks.count(), 2)
self.assertTrue(
timed_text_track.video.timedtexttracks.filter(
mode=TimedTextTrack.TRANSCRIPT
mode=TimedTextTrack.SUBTITLE
).exists()
)

Expand Down Expand Up @@ -190,7 +193,7 @@ def test_transcript_video_already_closed_caption(
self.assertEqual(timed_text_track.video.timedtexttracks.count(), 2)
self.assertTrue(
timed_text_track.video.timedtexttracks.filter(
mode=TimedTextTrack.TRANSCRIPT
mode=TimedTextTrack.SUBTITLE
).exists()
)

Expand Down
27 changes: 24 additions & 3 deletions src/backend/marsha/core/utils/transcode.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
""" Utils related to transcoding """

from django_peertube_runner_connector.models import Video as TranscodedVideo, VideoState

from marsha.core.defaults import ERROR, PEERTUBE_PIPELINE, READY
from django_peertube_runner_connector.utils.files import delete_temp_file

from marsha.core.defaults import (
ERROR,
PEERTUBE_PIPELINE,
READY,
TMP_VIDEOS_STORAGE_BASE_DIRECTORY,
VOD_VIDEOS_STORAGE_BASE_DIRECTORY,
)
from marsha.core.models.video import Video
from marsha.core.utils.time_utils import to_datetime


def transcoding_ended_callback(transcoded_video: TranscodedVideo):
"""
Callback used when the a Peertube runnner has finished
Callback used when a Peertube runner has finished
to transcode a video.

Parameters
Expand All @@ -23,6 +30,11 @@ def transcoding_ended_callback(transcoded_video: TranscodedVideo):
uploaded_on = directory[-1]
video_id = directory[-3]
video = Video.objects.get(pk=video_id)
tmp_filename = transcoded_video.directory.replace(
VOD_VIDEOS_STORAGE_BASE_DIRECTORY, TMP_VIDEOS_STORAGE_BASE_DIRECTORY
)

delete_temp_file(transcoded_video, tmp_filename)

if transcoded_video.state == VideoState.TRANSCODING_FAILED:
video.update_upload_state(ERROR, None)
Expand All @@ -39,3 +51,12 @@ def transcoding_ended_callback(transcoded_video: TranscodedVideo):
to_datetime(uploaded_on),
**{"resolutions": resolutions},
)


def delete_transcoding_temp_files():
"""Delete all transcoding temp files."""
for transcoded_video in TranscodedVideo.objects.all():
tmp_filename = transcoded_video.directory.replace(
VOD_VIDEOS_STORAGE_BASE_DIRECTORY, TMP_VIDEOS_STORAGE_BASE_DIRECTORY
)
delete_temp_file(transcoded_video, tmp_filename)
10 changes: 5 additions & 5 deletions src/backend/marsha/core/utils/transcript_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,11 @@ def transcript(video):
TimedTextTrack.objects.create(
video=video,
language=settings.LANGUAGES[0][0],
mode=TimedTextTrack.TRANSCRIPT,
mode=TimedTextTrack.SUBTITLE,
upload_state=defaults.PROCESSING,
)
except IntegrityError as e:
raise TranscriptError(
f"A transcript already exists for video {video.id}"
) from e
raise TranscriptError(f"A subtitle already exists for video {video.id}") from e

domain = (
settings.TRANSCODING_CALLBACK_DOMAIN
Expand Down Expand Up @@ -78,14 +76,16 @@ def transcription_ended_callback(

timed_text_track, created = video.timedtexttracks.get_or_create(
upload_state=defaults.PROCESSING,
mode=TimedTextTrack.TRANSCRIPT,
mode=TimedTextTrack.SUBTITLE,
defaults={
"language": language,
"extension": "vtt",
"uploaded_on": to_datetime(uploaded_on),
"upload_state": defaults.READY,
},
)
video.should_use_subtitle_as_transcript = True
video.save()
if not created:
timed_text_track.upload_state = defaults.READY
timed_text_track.uploaded_on = to_datetime(uploaded_on)
Expand Down
2 changes: 1 addition & 1 deletion src/backend/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ install_requires =
django-redis==5.4.0
django-safedelete==1.4.0
django-storages==1.14.3
django-peertube-runner-connector==0.9.0
django-peertube-runner-connector==0.10.0
django-waffle==4.1.0
Django<5
djangorestframework==3.15.2
Expand Down
Loading