Користувач:BunykBot/proteins.py

import re

from pywikibot import Page, Site, pagegenerators
import mwparserfromhell

from remove_spam import update_page

def shorten_page(page):
    code = mwparserfromhell.parse(page.text)
    for t in code.ifilter_templates():
        if t.name.strip().lower() != 'hidden':
            continue
        try:
            header = t.get('header').value.strip().lower()
            if header != 'послідовність амінокислот':
                continue
        except ValueError:
            continue
        content = t.get('content')
        content.value = shorten_aminoacids(content.value)
        break

    update_page(page, str(code), 'Розвікіфікація послідовності амінокислот')

ACID_PATTERN = re.compile(r'\[\[(.+?)\|(\w)\]\]')
def shorten_aminoacids(text):
    text = str(text)
    acids = ACID_PATTERN.findall(text)
    if not acids:
        return text
    acids = sorted(set(acids), key=lambda pair: pair[1])

    text = ACID_PATTERN.sub(r'\2', text)
    text += '\n<div style="font-family:monospace; width:30em; column-count: 3">\n'
    text += '<br />\n'.join(f'{code}: [[{name}]]' for name, code in acids)
    text += '</div>'
    return text

if __name__ == "__main__":
    for page in pagegenerators.SearchPageGenerator(
        r'insource:/%s/' % 'header=Послідовність амінокислот',
        namespaces=[0],
    ):
        shorten_page(page)