cancionero-web/src/latex_scanner.py

337 lines
16 KiB
Python

import argparse
import os
import re
from django.conf import settings
from django.template import Engine, Context
from os.path import join
from pathlib import Path
from audio_scanner import find_audios
from model import Chord, Line, Song, Verse
def mkdir(path: str) -> None:
if not os.path.exists(path):
os.mkdir(path)
# Note that re.match prepends ^ to the pattern, whereas re.search doesn't
def read_property(text: str, key: str) -> str | None:
'''A parser for \\beginsong attributes.'''
if text is None:
return None
match = re.search(key + "={(.*?)}", text)
return match.group(1) if match else None
def extra_put(extra: list, index: int, the_type: str, data: dict|None = None):
'''Adds the given data (if any) to the extra list, \
at the given index, and tagged with the given the_type.'''
payload = {'type': the_type, 'data': data} if data else {'type': the_type}
if index not in extra:
extra[index] = []
extra[index].append(payload)
class SongLoader:
'''Parses LaTeX files to build a collection of song objects.'''
def __init__(self, latex_file: str, audio_dir: str | None = None):
'''Initializes and populates a LaTeX reader.'''
self.index: int = 1
self.category: str | None = None
self.categories: list[str] = []
self.songs: list[Song] = []
if audio_dir:
self.audio_dir = audio_dir
self.scan(latex_file)
def scan(self, latex_file: str) -> None:
'''Reads through an index file and scans each song, with the same numbers.
:param latex_file: The main latex file, which includes songs via \\input.'''
main_file = open(latex_file, 'r')
for line in main_file.readlines():
# Remove newline
if line[-1] == '\n':
line = line[:-1]
# Remove comments
line = re.sub(r"%.*$", "", line)
# Read counter and category change (max 1 per line)
re_set_counter_match = re.search(r"\\setcounter{songnum}{(\d+)}", line)
if re_set_counter_match is not None:
self.index = int(re_set_counter_match.group(1))
re_chapter_match = re.search(r"\\songchapter{(.*?)}", line)
if re_chapter_match is not None:
self.category = re_chapter_match.group(1)
self.categories.append(self.category)
# Traverse into \input commands if path starts w/ 'canciones/'
re_input_match = re.search(r"\\input{(.*?)}", line)
if re_input_match is not None:
input_file = join(str(Path(latex_file).parent), re_input_match.group(1))
if not input_file.endswith(".tex"):
input_file += ".tex"
self.scan_song_file(input_file)
def scan_others(self, folder: str, start_index: int) -> None:
'''Looks for songs not found during self.scan(...).
:param folder: A folder to scan through, looking for .tex files.
:param start_index: When numbering new songs, the first index to use.
'''
self.index = start_index
self.category = "Nuevas"
self.categories.append(self.category)
files_scanned = [s.latex_file for s in self.songs]
files_to_scan = [os.path.join(root, name) for root, dirs, files in os.walk(folder, topdown=False) for name in
files]
files_to_scan = [f for f in files_to_scan if f.endswith(".tex") and f[f.index('/') + 1:] not in files_scanned]
files_to_scan = sorted(files_to_scan)
for f in files_to_scan:
print("Scanning extra file", f)
self.scan_song_file(f)
def scan_song_file(self, song_file: str) -> None:
'''Scan a single song file and store any songs found.'''
# Variables
ignore: bool = False
current_song: Song | None = None
current_verse: Verse | None = None
memory: str | None = None
memorizing: bool = False
replay_index: int = 0
transpose: int = 0
trfmt: str = "normal"
# General behaviour: read the file and scan line-by-line
# In each line, read char-by-char, searching for common LaTeX commands
# Apply the effects of these commands, and add each verse (line inside
# song after removing all commands) to build up each song.
# Commands include:
### Comments: % Something something -> Ignored
### Line break locations: \brk -> Ignored
### Transpose: \transpose{SEMITONES}
### Begin song: \beginsong{NAME}[METADATA]
### End song: \endsong
### Verse begin/end: \beginverse, \endverse
### Chorus begin/end: \beginchorus, \endchorus
### Capo: \capo{FRET}
### Chord-excl. txt.: \ifchorded, \else, \fi
### (the contents between \else and \fi are discarded)
### Echoes: \echo{TEXT}
### Chord: \[CHORD]
### Chord repetition: \^
### Music repetition: \lrep, \rrep
### Lyric repetition: \rep{TIMES}
### Chord memory: \memorize, \replay
### Transpose format: \renewcommand{\trchordformat}[2]{.*}
### Other unrecognized commands: \NAME([ARG]|{ARG})*
for line in open(song_file, "r").readlines():
# Remove newline
if line[-1] == '\n':
line = line[:-1]
# Remove comments and \brk commands
text = re.sub(r"%.*$", "", line)
text = re.sub(r"\\brk({})?", '', text)
text = re.sub(r"``", u"\u201C", text)
text = re.sub(r"''", u"\u201D", text)
text = re.sub(r"`", u"\u2018", text)
text = re.sub(r"'", u"\u2019", text)
extras = {}
i = 0
while i <= len(text):
beginning = text[:i]
remain = text[i:]
if re.match(r"\\fi", remain):
ignore = False
text = beginning + text[i + len("\\fi"):]
continue
if ignore:
i += 1
continue
# Command lookup
if re_transpose_match := re.match(r"\\transpose *?{(-?\d+?)}", remain):
text = beginning + text[i + len(re_transpose_match.group(0)):]
transpose = int(re_transpose_match.group(1))
continue
if re_song_begin_match := re.match(r"\\beginsong *?{(.*?)}(\[.*?])?", remain):
text = beginning + text[i + len(re_song_begin_match.group(0)):]
if current_song is not None:
print("error end-begin song! %s at %s" % (line, song_file))
self.songs.append(current_song)
self.index += 1
current_song = Song(re_song_begin_match.group(1), self.index,
author=read_property(re_song_begin_match.group(2), "by"),
origin=read_property(re_song_begin_match.group(2), "m"),
category=self.category,
latex_file=song_file[song_file.index('/') + 1:])
transpose = 0
trfmt = "normal"
memory = None
memorizing = False
replay_index = 0
if hasattr(self, "audio_dir"):
for a in find_audios(self.index, self.audio_dir):
current_song.add_audio(a)
continue
if re.match(r"\\endsong", remain):
text = beginning + text[i + len("\\endsong"):]
self.songs.append(current_song)
current_song = None
self.index += 1
continue
if re_verse_cmd_match := re.match(r"\\(begin|end)(verse|chorus)", remain):
text = beginning + text[i + len(re_verse_cmd_match.group(0)):]
is_chorus = re_verse_cmd_match.group(2) == "chorus"
if current_song is None:
print("verse %s found outside song in %s" % (line, song_file))
if re_verse_cmd_match.group(1) == "begin":
if current_verse is not None:
print("error end-begin verse! %s at %s" % (line, song_file))
current_song.add_verse(current_verse)
if not is_chorus and memory is None:
memory = []
memorizing = True
replay_index = 0
current_verse = Verse(is_chorus)
else: # end of verse/chorus
if current_verse.is_chorus != is_chorus:
print("ended chorus-verse with wrong command?")
memorizing = False
current_song.add_verse(current_verse)
current_verse = None
continue
if (re_capo_match := re.match(r"\\capo{(\d+?)}", remain)) and current_song:
text = beginning + text[i + len(re_capo_match.group(0)):]
current_song.set_capo(int(re_capo_match.group(1)))
continue
if re.match(r"\\ifchorded", remain):
text = beginning + text[i + len("\\ifchorded"):]
continue
if re.match(r"\\else", remain):
ignore = True
text = beginning + text[i + len("\\else"):]
continue
if re_echo_match := re.match(r"\\echo[ \t]*?{((.|{.*?})*?)}", remain):
text = beginning + re_echo_match.group(1) + "\\echoend" + text[i + len(re_echo_match.group(0)):]
extra_put(extras, i, "echo")
continue
if re.match(r"\\echoend", remain):
text = beginning + text[i + len("\\echoend"):]
extra_put(extras, i, "echo")
continue
if re_chord_match := re.match(r"\\\[(.+?)]", remain):
text = beginning + text[i + len(re_chord_match.group(0)):]
c = Chord(re_chord_match.group(1), transpose, trfmt)
extra_put(extras, i, "chord", c)
if memorizing:
memory.append(c)
continue
if re.match(r"\^", remain):
text = beginning + text[i + len("^"):]
if memory is not None and replay_index < len(memory):
extra_put(extras, i, "chord", memory[replay_index])
replay_index += 1
continue
if re_dir_rep_match := re.match(r"\\([lr]rep)", remain):
text = beginning + text[i + len(re_dir_rep_match.group(0)):]
extra_put(extras, i, "dir-rep", re_dir_rep_match.group(1))
continue
if re_rep_match := re.match(r"\\rep{(\d+?)}", remain):
text = beginning + text[i + len(re_rep_match.group(0)):]
extra_put(extras, i, 'rep', int(re_rep_match.group(1)))
continue
if re.match(r"\\memorize", remain):
text = beginning + text[i + len("\\memorize"):]
memory = []
memorizing = True
continue
if re.match(r"\\replay", remain):
text = beginning + text[i + len("\\replay"):]
replay_index = 0
continue
# Double or single transpose mode
if re_trfmt := re.match(r"\\renewcommand{\\trchordformat}\[2\]{\\vbox{\\hbox{#1}\\hbox{#2}}}", remain):
text = beginning + text[i + len(re_trfmt.group(0)):]
trfmt = "double"
continue
if re_trfmt := re.match(r"\\renewcommand{\\trchordformat}\[2\]{\\vbox{\\hbox{#1}\\hbox{}}}", remain):
text = beginning + text[i + len(re_trfmt.group(0)):]
trfmt = "hover"
continue
if re_trfmt := re.match(r"\\renewcommand{\\trchordformat}\[2\]{\\hbox{#2}}", remain):
text = beginning + text[i + len(re_trfmt.group(0)):]
trfmt = "normal"
continue
# Command lookup end, removing any unrecognized command
re_macro_match = re.match(r"\\([^ \t{\[]+)[ \t]*?({.*?}|\[.*?])*", remain)
if re_macro_match:
text = beginning + text[i + len(re_macro_match.group(0)):]
print("Removed an unrecognized command:", re_macro_match.group(0))
continue
i += 1
if not current_verse and text.strip() != '':
print("l outside v:", text)
continue
if ignore or text.strip() == '':
continue
current_verse.add_line(Line(text, extras))
def sort_categories(self) -> dict[str, list[Song]]:
'''Returns a dictionary of categories to lists of songs (sorted by number).'''
result = {}
for c in self.categories:
result[c] = sorted([s for s in self.songs if s.category == c],
key=lambda s: s.number)
return result
def print_index(self, index_file, dj_engine):
context = Context({'sorted_categories': self.sort_categories()})
html = dj_engine.get_template("index.html").render(context)
with open(index_file, 'w') as f:
f.write(html)
@staticmethod
def print_song(song, directory, dj_engine):
context = Context({'song': song})
num_dir = join(directory, "%03d" % song.number)
mkdir(num_dir)
with open(join(num_dir, "index.html"), 'w') as f:
f.write(dj_engine.get_template("song_redir.html").render(context))
song_dir = join(directory, song.url())
mkdir(song_dir)
with open(join(song_dir, "index.html"), 'w') as f:
f.write(dj_engine.get_template("song.html").render(context))
def generate_html(self, output_dir, dj_engine):
mkdir(output_dir)
for song in self.songs:
self.print_song(song, output_dir, dj_engine)
self.print_index(join(output_dir, "index.html"), dj_engine)
def create_argparser():
parser = argparse.ArgumentParser()
parser.add_argument("--latex", required=True, nargs=1, help="The main LaTeX file. It may include other documents")
parser.add_argument("--other-latex", required=False, nargs=1, default=[None],
help="A folder with songs, those not referenced in the main file will be included at the end.")
parser.add_argument("--other-index", required=False, nargs=1, type=int, default=[400],
help="The first song number for songs outside the main songbook.")
parser.add_argument("--audios", required=False, nargs=1, default=[None],
help="The folder containing the audio files.")
parser.add_argument("--output-dir", required=False, nargs=1, default=["public"])
return parser
if __name__ == '__main__':
args = create_argparser().parse_args()
loader = SongLoader(args.latex[0], args.audios[0])
if args.other_latex:
loader.scan_others(args.other_latex[0], int(args.other_index[0]))
settings.configure(USE_TZ=False, USE_I18N=False)
e = Engine(dirs=["res/templates/"])
loader.generate_html(args.output_dir[0], e)