Source code for pycharmers.cli.video_of_lyric

#coding: utf-8
import os
import cv2
import sys
import json
import argparse
import subprocess
import numpy as np
from PIL import Image

from ..utils._colorings import toBLUE, toGREEN, toACCENT
from ..utils.argparse_utils import ListParamProcessorCreate
from ..utils.audio_utils import synthesize_audio
from ..utils.generic_utils import now_str
from ..utils.monitor_utils import ProgressMonitor
from ..utils.pil_utils import draw_text_in_pil

[docs]def video_of_lyric(argv=sys.argv[1:]): """Create a lyric Video. Args: json (str) : Path to parameter json file. --ttfontname (str) : A filename or file-like object containing a TrueType font. If the file is not found in this filename, the loader may also search in other directories, such as the ``fonts/`` directory on Windows or ``/Library/Fonts/`` , ``/System/Library/Fonts/`` and ``~/Library/Fonts/`` on macOS. --margin (int) : The margin size. --mode (str) : Optional mode to use for color values. --fontsize (int) : The font size. --fontwidth (int) : The font width. --fontheight (int) : The font height. --img-size (tuple) : The image size. --bgRGB (tuple) : he color of background image. (RGB) --textRGB (tuple) : The color of text. (RGB) --alpha-range (float) : How many seconds to set alpha to 1 (maximum). --fps (float) : The video fps. --span (int) : The span between lyrics. .. code-block:: python >>> # Create a json. >>> import numpy as np >>> def func(word, start, end, indent='\\t'*4): ... print(f'{indent}"words": "{word}",') ... print(f'{indent}"seconds": {[round(e,3) for e in np.linspace(start, end, len(word))]},') .. code-block:: python >>> import json >>> from pycharmers.utils import dumps_json >>> with open("era-it-doo/era-it.json") as f: ... data = json.load(f) >>> print(dumps_json(data)) { "kwargs": { "ttfontname": "/Users/iwasakishuto/Library/Fonts/851MkPOP_002.ttf", "img_size": [ 360, 640 ], "fontsize": 38, "fontwidth": 30, "margin": 10, "bgRGB": [ 0, 0, 0, 255 ], "mode": "RGBA", "ret_position": "line" }, "texts": [ [{"words":"1日今日も、","seconds":[2.4, 2.624, 2.848, 3.072, 3.296, 3.52],"x":-1,"y":50},{"words":"こなした労働、","seconds":[4.1, 4.317, 4.533, 4.75, 4.967, 5.183, 5.4],"x":-1,"y":-1},{"words":"辛かったけど","seconds":[5.7, 5.96, 6.22, 6.48, 6.74, 7.0],"x":-1,"y":-1},{"words":"本当がんばったじゃん","seconds":[7.0, 7.222, 7.444, 7.667, 7.889, 8.111, 8.333, 8.556, 8.778, 9.0],"x":-1,"y":-1},{"words":"1日今日も、","seconds":[9.1, 9.38, 9.66, 9.94, 10.22, 10.5],"x":-1,"y":260},{"words":"いちいちどうこう、","seconds":[10.8, 10.962, 11.125, 11.288, 11.45, 11.612, 11.775, 11.938, 12.1],"x":-1,"y":-1},{"words":"いわれたけど","seconds":[12.3, 12.52, 12.74, 12.96, 13.18, 13.4],"x":-1,"y":-1},{"words":"本当にがんばったじゃん","seconds":[13.4, 13.59, 13.78, 13.97, 14.16, 14.35, 14.54, 14.73, 14.92, 15.11, 15.3],"x":-1,"y":-1}] ] } Note: When you run from the command line, execute as follows:: $ video_of_lyric dodo-era-it.json --audio dodo-era-it.mp4[.mp3] +--------------------------------------------+ | Sample | +============================================+ | .. image:: _images/cli.video_of_lyric.gif | +--------------------------------------------+ """ parser = argparse.ArgumentParser(prog="video_of_lyric", description="Create a lyric Video.", add_help=True) parser.add_argument("json", type=str, help="Path to parameter json file.") parser.add_argument("--ttfontname", type=str, default=None, help="A filename or file-like object containing a TrueType font. If the file is not found in this filename, the loader may also search in other directories, such as the ``fonts/`` directory on Windows or ``/Library/Fonts/`` , ``/System/Library/Fonts/`` and ``~/Library/Fonts/`` on macOS.") parser.add_argument("--margin", type=int, default=None, help="The margin size.") parser.add_argument("--mode", type=str, default=None, help="Optional mode to use for color values.") parser.add_argument("--fontsize", type=int, default=None, help="The font size.") parser.add_argument("--fontwidth", type=int, default=None, help="The font width.") parser.add_argument("--fontheight", type=int, default=None, help="The font height.") parser.add_argument("--img-size", action=ListParamProcessorCreate(type=int), default=None, help="The image size.") parser.add_argument("--bgRGB", action=ListParamProcessorCreate(type=int), default=None, help="The color of background image. (RGB)") parser.add_argument("--textRGB", action=ListParamProcessorCreate(type=int), default=None, help="The color of text. (RGB)") parser.add_argument("--alpha-range", type=float, help="The video length [s].", default=1.) parser.add_argument("--fps", type=float, help="The video fps.", default=30.) parser.add_argument("--span", type=int, help="The span between lyrics", default=None) parser.add_argument("--audio", type=str, help="The audio path.", default=None) args = parser.parse_args(argv) json_path = args.json with open(json_path, mode="r") as f: data = json.load(f) args_kwargs = dict(args._get_kwargs()) data_kwargs = data.get("kwargs", {}) get_kwargs = lambda x,default=None:args_kwargs.get(x) or data_kwargs.get(x, default) img_size = tuple(get_kwargs("img_size", default=[360,640])[:2]) bgRGB = tuple(get_kwargs("bgRGB", default=(0,0,0,255))) textRGB = tuple(get_kwargs("textRGB", default=(255,255,255,255))) mode = get_kwargs("mode", default="RGBA") margin = get_kwargs("margin", default=10) fontsize = get_kwargs("fontsize", default=40) fontwidth = get_kwargs("fontwidth", default=None) fontheight = get_kwargs("fontheight", default=None) span = get_kwargs("span", default=1) audio_path = get_kwargs("audio", default=None) alpha_range = args.alpha_range fps = args.fps spf = 1/fps kwargs = { "ttfontname": get_kwargs("ttfontname", default=""), "img_size": img_size, "fontsize": fontsize, "fontwidth": fontwidth, "fontheight": fontheight, "margin": margin, "bgRGB": bgRGB, "mode": mode, # "ret_position": "line", } text_data = data.get("texts", [[]]) num_texts = len(text_data) sec_filter = [] for i,page in enumerate(text_data): start, end = (1e16, 0) for line in page: secs = line.get("seconds", []) start = min(start, *secs)-alpha_range end = max(end, *secs) sec_filter.append([i,start,end+alpha_range+span]) duration = max([e[2] for e in sec_filter]) + alpha_range + 1 num_frames = int(duration*fps) def set_pos(data, name, default): p = data.get(name, -1) if p==-1: p = default return p def set_fontcolor(start, sec, textRGB, alpha_range): fc = [0]*4 fc[:3] = textRGB[:3] fc[3] = min(255, max(0, int((sec-start)/alpha_range*255))) return tuple(fc) params = { "duration" : duration, "alpha range" : alpha_range, "FPS": fps, "span": span, "audio": audio_path, } params.update(kwargs) print("[Parameters]") for k,v in params.items(): print(f"* {toACCENT(k)}: {toGREEN(v)}") root, ext = os.path.splitext(json_path) video_path = f"{root}_{now_str()}.mp4" fourcc = cv2.VideoWriter_fourcc('m','p','4','v') out_video = cv2.VideoWriter(video_path, fourcc, fps, img_size) monitor = ProgressMonitor(max_iter=num_frames, barname="Editing") it = sec = 0 while True: it+=1 bg = Image.new(mode=mode, size=img_size, color=bgRGB) sec += spf for i,*_ in filter(lambda x:x[1]<= sec <x[2], sec_filter): texts = text_data[i] init_x = x = y = margin for j,line in enumerate(texts): words = line.get("words", "") startSecs = line.get("seconds", []) x = set_pos(data=line, name="x", default=x) y = set_pos(data=line, name="y", default=y) for w,start in zip(words, startSecs): fc = set_fontcolor(start, sec, textRGB, alpha_range) bg,(x,_) = draw_text_in_pil(text=w, img=bg, ret_position="word", textRGB=fc, x=x, y=y, **kwargs) y += fontsize x = init_x break out_video.write(np.asanyarray(bg.convert("RGB"))) monitor.report(it, sec=f"{sec:.2f}/{duration}", text=f"{j+1}/{num_texts}") if sec>duration: break out_video.release() monitor.remove() print(f"{toBLUE(video_path)} (No Sound) is created.") if audio_path is not None: synthesize_audio(video_path=video_path, audio_path=audio_path)