import re from typing import Any from collections.abc import Sequence, Iterable import aiosqlite from ter.config import Settings settings = Settings() class Database: Params = Sequence[Any] | dict[str, Any] def __init__(self, uri: str) -> None: self._uri: str = uri self._connection: aiosqlite.Connection | None = None async def connect(self): self._connection = await aiosqlite.connect(self._uri, uri=True) async def disconnect(self): await self._connection.close() self._connection = None async def execute(self, sql: str, params: Params = ()): return await self._connection.execute(sql, params) async def executemany(self, sql: str, params: Iterable[Params]): return await self._connection.executemany(sql, params) database = Database(settings.SQLITE_URI) _TOKENIZE_TRANSLATE_TABLE = ( "------------------------------------------------0123456789------" "-abcdefghijklmnopqrstuvwxyz------abcdefghijklmnopqrstuvwxyz-----" "----------------------------------------------------------------" "aaaaaa_ceeeeiiii_nooooo-ouuuuy__aaaaaa_ceeeeiiii_nooooo-ouuuuy_y" ) def tokenize(text: str): """Break a string into ASCII tokens (removing diacritic). Return a list of token, position pairs. """ if any(ord(c) > 255 for c in text): raise ValueError(text) text = text.translate(_TOKENIZE_TRANSLATE_TABLE) return [ ( m.group(0), m.start(0), ) for m in re.finditer(r"\w+", text) ]