Source code for pycharmers.cli.revise_text

#coding: utf-8
import os
import re
import sys
import argparse
import docx
import pandas as pd

from ..utils._colorings import toRED, toGREEN
from ..utils.generic_utils import now_str

[docs]def revise_text(argv=sys.argv[1:]):
    """Revise word file.

    Args:
        -W/--word (str)    : Path to the word file.
        -E/--excel (str)   : Path to the excel file.
        --sheet-name (str) : SheetName for the Excel.
        --NG (str)         : The column name which indicates NG words. Defaults to ``"NG"``
        --OK (str)         : The column name which indicates OK words. Defaults to ``"OK"``

    Note:
        When you run from the command line, execute as follows::
        
        $ revise-text --word sample.docx --excel wordlist.xlsx        
    """
    parser = argparse.ArgumentParser(prog="revise-text", add_help=True)
    parser.add_argument("-W", "--word",  type=str, help="Path to the word file.")
    parser.add_argument("-E", "--excel", type=str, help="Path to the excel file.")
    parser.add_argument("--sheet-name",  type=str, help="SheetName for the Excel.")
    parser.add_argument("--NG", type=str, default="NG", help="The column name which indicates NG words. Defaults to ``NG``")
    parser.add_argument("--OK", type=str, default="OK", help="The column name which indicates OK words. Defaults to ``OK``")
    args = parser.parse_args(argv)

    df_wordlist = pd.read_excel(args.excel, sheet_name=args.sheet_name).fillna("")
    NG2OK = dict(df_wordlist[[args.NG, args.OK]].values)

    def repl_create(para_no:int, para_digit:int, text_digit:int) -> callable:
        """Replace word while printing the logs.

        Args:
            para_no (int)    : The paragraph number.
            para_digit (int) : The digit for the number of paragraphs.
            text_digit (int) : The maximum digit for the number of texts in a paragraph.

        Returns:
            callable: Replacement function for ``re.sub``
        """
        def repl(match):
            ng_word = match.group()
            ok_word = NG2OK[ng_word]
            print(f"\t[Para.{para_no:>0{para_digit}}] ({match.start():>0{text_digit}}-{match.end():>0{text_digit}}) Reveised {toRED(ng_word)} -> {toGREEN(ok_word)}")
            return ok_word
        return repl

    doc = docx.Document(args.word)
    para_digit = len(str(len(doc.paragraphs)))
    text_digit = len(str(max([len(t) for t in doc.paragraphs])))
    for i,para in enumerate(doc.paragraphs):
        for ng in NG2OK.keys():
            para.text = re.sub(pattern=ng, repl=repl_create(para_no=i, para_digit=para_digit, text_digit=text_digit), string=para.text)
    doc.save(now_str().join(os.path.splitext(args.word)))
Source code for pycharmers.cli.revise_text

Other contents

Social link

Table of Contents