Skip to content

Commit

Permalink
player/loadfile: match language and subcodes
Browse files Browse the repository at this point in the history
  • Loading branch information
kasper93 committed May 6, 2024
1 parent a742d37 commit b72f414
Show file tree
Hide file tree
Showing 7 changed files with 372 additions and 23 deletions.
16 changes: 8 additions & 8 deletions DOCS/man/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ Track Selection
---------------

``--alang=<languagecode[,languagecode,...]>``
Specify a priority list of audio languages to use. Different container
formats employ different language codes. DVDs use ISO 639-1 two-letter
language codes, Matroska, MPEG-TS and NUT use ISO 639-2 three-letter
language codes, while OGM uses a free-form identifier. See also ``--aid``.
Specify a prioritized list of audio languages to use, as IETF language tags.
Equivalent ISO 639-1 two-letter and ISO 639-2 three-letter codes are treated
the same. The first tag in the list that matches track's language in the file
will be used. A track that matches more subtags will be preferred over one
that matches fewer. See also ``--aid``.

This is a string list option. See `List Options`_ for details.

Expand All @@ -20,10 +21,7 @@ Track Selection
audio.

``--slang=<languagecode[,languagecode,...]>``
Specify a priority list of subtitle languages to use. Different container
formats employ different language codes. DVDs use ISO 639-1 two letter
language codes, Matroska uses ISO 639-2 three letter language codes while
OGM uses a free-form identifier. See also ``--sid``.
Equivalent to ``--alang``, for subtitle tracks.

This is a string list option. See `List Options`_ for details.

Expand All @@ -33,6 +31,8 @@ Track Selection
a DVD and falls back on English if Hungarian is not available.
- ``mpv --slang=jpn example.mkv`` plays a Matroska file with Japanese
subtitles.
- ``mpv --slang=pt-BR example.mkv`` plays a Matroska file with Brazilian
Portuguese subtitles if available, and otherwise any Portuguese subtitles.

``--vlang=<...>``
Equivalent to ``--alang`` and ``--slang``, for video tracks.
Expand Down
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ sources = files(
'misc/dispatch.c',
'misc/io_utils.c',
'misc/json.c',
'misc/language.c',
'misc/natural_sort.c',
'misc/node.c',
'misc/path_utils.c',
Expand Down
297 changes: 297 additions & 0 deletions misc/language.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/

#include "language.h"

#include <limits.h>
#include <stdint.h>

#include "common/common.h"
#include "misc/bstr.h"

static const struct lang {
char match[4];
char canonical[4];
} langmap[] = {
{"aa", "aar"},
{"ab", "abk"},
{"ae", "ave"},
{"af", "afr"},
{"ak", "aka"},
{"am", "amh"},
{"an", "arg"},
{"ar", "ara"},
{"as", "asm"},
{"av", "ava"},
{"ay", "aym"},
{"az", "aze"},
{"ba", "bak"},
{"be", "bel"},
{"bg", "bul"},
{"bh", "bih"},
{"bi", "bis"},
{"bm", "bam"},
{"bn", "ben"},
{"bo", "tib"},
{"bod", "tib"},
{"br", "bre"},
{"bs", "bos"},
{"ca", "cat"},
{"ce", "che"},
{"ces", "cze"},
{"ch", "cha"},
{"co", "cos"},
{"cr", "cre"},
{"cs", "cze"},
{"cu", "chu"},
{"cv", "chv"},
{"cy", "wel"},
{"cym", "wel"},
{"da", "dan"},
{"de", "ger"},
{"deu", "ger"},
{"dv", "div"},
{"dz", "dzo"},
{"ee", "ewe"},
{"el", "gre"},
{"ell", "gre"},
{"en", "eng"},
{"eo", "epo"},
{"es", "spa"},
{"et", "est"},
{"eu", "baq"},
{"eus", "baq"},
{"fa", "per"},
{"fas", "per"},
{"ff", "ful"},
{"fi", "fin"},
{"fj", "fij"},
{"fo", "fao"},
{"fr", "fre"},
{"fra", "fre"},
{"fy", "fry"},
{"ga", "gle"},
{"gd", "gla"},
{"gl", "glg"},
{"gn", "grn"},
{"gu", "guj"},
{"gv", "glv"},
{"ha", "hau"},
{"he", "heb"},
{"hi", "hin"},
{"ho", "hmo"},
{"hr", "hrv"},
{"ht", "hat"},
{"hu", "hun"},
{"hy", "arm"},
{"hye", "arm"},
{"hz", "her"},
{"ia", "ina"},
{"id", "ind"},
{"ie", "ile"},
{"ig", "ibo"},
{"ii", "iii"},
{"ik", "ipk"},
{"io", "ido"},
{"is", "ice"},
{"isl", "ice"},
{"it", "ita"},
{"iu", "iku"},
{"ja", "jpn"},
{"jv", "jav"},
{"ka", "geo"},
{"kat", "geo"},
{"kg", "kon"},
{"ki", "kik"},
{"kj", "kua"},
{"kk", "kaz"},
{"kl", "kal"},
{"km", "khm"},
{"kn", "kan"},
{"ko", "kor"},
{"kr", "kau"},
{"ks", "kas"},
{"ku", "kur"},
{"kv", "kom"},
{"kw", "cor"},
{"ky", "kir"},
{"la", "lat"},
{"lb", "ltz"},
{"lg", "lug"},
{"li", "lim"},
{"ln", "lin"},
{"lo", "lao"},
{"lt", "lit"},
{"lu", "lub"},
{"lv", "lav"},
{"mg", "mlg"},
{"mh", "mah"},
{"mi", "mao"},
{"mk", "mac"},
{"mkd", "mac"},
{"ml", "mal"},
{"mn", "mon"},
{"mr", "mar"},
{"mri", "mao"},
{"ms", "may"},
{"msa", "may"},
{"mt", "mlt"},
{"my", "bur"},
{"mya", "bur"},
{"na", "nau"},
{"nb", "nob"},
{"nd", "nde"},
{"ne", "nep"},
{"ng", "ndo"},
{"nl", "dut"},
{"nld", "dut"},
{"nn", "nno"},
{"no", "nor"},
{"nr", "nbl"},
{"nv", "nav"},
{"ny", "nya"},
{"oc", "oci"},
{"oj", "oji"},
{"om", "orm"},
{"or", "ori"},
{"os", "oss"},
{"pa", "pan"},
{"pi", "pli"},
{"pl", "pol"},
{"ps", "pus"},
{"pt", "por"},
{"qu", "que"},
{"rm", "roh"},
{"rn", "run"},
{"ro", "rum"},
{"ron", "rum"},
{"ru", "rus"},
{"rw", "kin"},
{"sa", "san"},
{"sc", "srd"},
{"sd", "snd"},
{"se", "sme"},
{"sg", "sag"},
{"si", "sin"},
{"sk", "slo"},
{"sl", "slv"},
{"slk", "slo"},
{"sm", "smo"},
{"sn", "sna"},
{"so", "som"},
{"sq", "alb"},
{"sqi", "alb"},
{"sr", "srp"},
{"ss", "ssw"},
{"st", "sot"},
{"su", "sun"},
{"sv", "swe"},
{"sw", "swa"},
{"ta", "tam"},
{"te", "tel"},
{"tg", "tgk"},
{"th", "tha"},
{"ti", "tir"},
{"tk", "tuk"},
{"tl", "tgl"},
{"tn", "tsn"},
{"to", "ton"},
{"tr", "tur"},
{"ts", "tso"},
{"tt", "tat"},
{"tw", "twi"},
{"ty", "tah"},
{"ug", "uig"},
{"uk", "ukr"},
{"ur", "urd"},
{"uz", "uzb"},
{"ve", "ven"},
{"vi", "vie"},
{"vo", "vol"},
{"wa", "wln"},
{"wo", "wol"},
{"xh", "xho"},
{"yi", "yid"},
{"yo", "yor"},
{"za", "zha"},
{"zh", "chi"},
{"zho", "chi"},
{"zu", "zul"},
};

static int lang_compare(const void *key, const void *lang)
{
return bstrcasecmp0(*(const bstr*)key, ((const struct lang*)lang)->match);
}

static bstr canonicalize(bstr lang)
{
const struct lang *l = bsearch(&lang, langmap, MP_ARRAY_SIZE(langmap),
sizeof(langmap[0]), &lang_compare);
return bstr0(l ? l->canonical : NULL);
}

int mp_match_lang(char **langs, const char *lang)
{
if (!lang)
return 0;

void *ta_ctx = talloc_new(NULL);
int lang_parts_n = 0;
bstr *lang_parts = NULL;
bstr rest = bstr0(lang);
while (rest.len) {
bstr s = bstr_split(rest, "-", &rest);
MP_TARRAY_APPEND(ta_ctx, lang_parts, lang_parts_n, s);
}

int best_score = 0;
if (!lang_parts_n)
goto done;

for (int idx = 0; langs && langs[idx]; idx++) {
rest = bstr0(langs[idx]);
int part = 0;
int score = 0;
while (rest.len) {
bstr s = bstr_split(rest, "-", &rest);
if (!part) {
bstr lc1 = canonicalize(lang_parts[0]);
bstr lc2 = canonicalize(s);
if (bstrcasecmp(lc1.len ? lc1 : lang_parts[0], lc2.len ? lc2 : s))
break;
score = INT_MAX - idx;
part++;
continue;
}

if (part >= lang_parts_n)
break;

if (bstrcasecmp(lang_parts[part], s))
score -= 1000;

part++;
}
score -= (lang_parts_n - part) * 1000;
best_score = MPMAX(best_score, score);
}

done:
talloc_free(ta_ctx);
return best_score;
}
2 changes: 2 additions & 0 deletions misc/language.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#ifndef MP_LANGUAGE_H
#define MP_LANGUAGE_H

// Result numerically higher => better match. 0 == no match.
int mp_match_lang(char **langs, const char *lang);
char **mp_get_user_langs(void);

#endif /* MP_LANGUAGE_H */
18 changes: 3 additions & 15 deletions player/loadfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -449,18 +449,6 @@ void add_demuxer_tracks(struct MPContext *mpctx, struct demuxer *demuxer)
add_stream_track(mpctx, demuxer, demux_get_stream(demuxer, n));
}

// Result numerically higher => better match. 0 == no match.
static int match_lang(char **langs, const char *lang)
{
if (!lang)
return 0;
for (int idx = 0; langs && langs[idx]; idx++) {
if (lang && strcasecmp(langs[idx], lang) == 0)
return INT_MAX - idx;
}
return 0;
}

/* Get the track wanted by the user.
* tid is the track ID requested by the user (-2: deselect, -1: default)
* lang is a string list, NULL is same as empty list
Expand Down Expand Up @@ -504,7 +492,7 @@ static bool compare_track(struct track *t1, struct track *t2, char **langs, bool
(t2->program_id == preferred_program))
return t1->program_id == preferred_program;
}
int l1 = match_lang(langs, t1->lang), l2 = match_lang(langs, t2->lang);
int l1 = mp_match_lang(langs, t1->lang), l2 = mp_match_lang(langs, t2->lang);
if (!os_langs && l1 != l2)
return l1 > l2;
if (forced)
Expand Down Expand Up @@ -619,10 +607,10 @@ struct track *select_default_track(struct MPContext *mpctx, int order,
bool audio_matches = audio_lang && track->lang && !strcasecmp(audio_lang, track->lang);
bool forced = track->forced_track && (opts->subs_fallback_forced == 2 ||
(audio_matches && opts->subs_fallback_forced == 1));
bool lang_match = !os_langs && match_lang(langs, track->lang) > 0;
bool lang_match = !os_langs && mp_match_lang(langs, track->lang) > 0;
bool subs_fallback = (track->is_external && !track->no_default) || opts->subs_fallback == 2 ||
(opts->subs_fallback == 1 && track->default_track);
bool subs_matching_audio = (!match_lang(langs, audio_lang) || opts->subs_with_matching_audio == 2 ||
bool subs_matching_audio = (!mp_match_lang(langs, audio_lang) || opts->subs_with_matching_audio == 2 ||
(opts->subs_with_matching_audio == 1 && track->forced_track));
if (subs_matching_audio && ((!pick && (forced || lang_match || subs_fallback)) ||
(pick && compare_track(track, pick, langs, os_langs, forced, mpctx->opts, preferred_program))))
Expand Down

0 comments on commit b72f414

Please sign in to comment.