Користувач:MonxBot/скрипти
Обидва боти написані на базі вбудованого тестового basic.py.
fixrefs.py ред.
Скрипт, що знаходить теги ref з однаковою назвою, але різним вмістом, та попарно дає користувачеві вибір очистити вміст одного з тегів, допоки всі подібні проблеми на сторінці не буде виправлено.
from __future__ import annotations
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import (
AutomaticTWSummaryBot,
ConfigParserBot,
ExistingPageBot,
SingleSiteBot,
)
import wikitextparser as wtp
import re
import difflib
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816
class BasicBot(
# Refer pywikobot.bot for generic bot classes
SingleSiteBot, # A bot only working on one site
ConfigParserBot, # A bot which reads options from scripts.ini setting file
# CurrentPageBot, # Sets 'current_page'. Process it in treat_page method.
# # Not needed here because we have subclasses
ExistingPageBot, # CurrentPageBot which only treats existing pages
AutomaticTWSummaryBot, # Automatically defines summary; needs summary_key
):
use_redirects = False # treats non-redirects only
summary_key = 'basic-changing'
update_options = {
'replace': False, # delete old text and write the new text
'summary': None, # your own bot summary
'text': 'Test', # add this text from option. 'Test' is default
'top': False, # append text on top of the page
}
def treat_page(self) -> None:
"""Load the given page, do some changes, and save it."""
text = self.current_page.text
text_to_add = self.opt.text
# If you find out that you do not want to edit this page, just return.
# Example: This puts Text on a page.
# Retrieve your private option
# Use your own text or use the default 'Test'
print(self.current_page.extract(lines=2))
parsed = wtp.parse(text)
tags = parsed.get_tags(name="ref")
tags_dict = {}
for tag in tags:
if "name" in tag.attrs:
if tag.attrs["name"] in tags_dict:
if len(tag.contents) == 0 or len(tags_dict[tag.attrs["name"]].contents) == 0: continue
if tag.contents == tags_dict[tag.attrs["name"]].contents: continue
print("Tag conflict!")
tag1 = tags_dict[tag.attrs["name"]]
tag2 = tag
print(f'Tag 1:\n {tag1}')
print(f'Tag 2:\n {tag2}')
l1 = tag1.contents
l2 = tag2.contents
print("Pick one: (3 - skip, 4 - diff)")
tag_chosen = "0"
while not tag_chosen in ["1","2","3"]:
tag_chosen = input()
if tag_chosen == "1":
text = text.replace(f'>{l2}</ref', '/')
elif tag_chosen == "2":
text = text.replace(f'>{l1}</ref', '/')
tags_dict[tag1.attrs["name"]] = tag2
elif tag_chosen == "3":
break
elif tag_chosen == "4":
difference = difflib.Differ()
for line in difference.compare(l1.splitlines(keepends=True), l2.splitlines(keepends=True)):
print(" ")
print(line, end="")
else:
tags_dict.update({tag.attrs["name"]: tag})
# if summary option is None, it takes the default i18n summary from
# i18n subdirectory with summary_key as summary key.
self.put_current(text, summary=self.opt.summary)
def main(*args: str) -> None:
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
options = {}
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
gen_factory = pagegenerators.GeneratorFactory()
# Process pagegenerators arguments
local_args = gen_factory.handle_args(local_args)
# Parse your own command line arguments
for arg in local_args:
arg, _, value = arg.partition(':')
option = arg[1:]
if option in ('summary', 'text'):
if not value:
pywikibot.input('Please enter a value for ' + arg)
options[option] = value
# take the remaining options as booleans.
# You will get a hint if they aren't pre-defined in your bot class
else:
options[option] = True
# The preloading option is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = gen_factory.getCombinedGenerator(preload=True)
# check if further help is needed
if not pywikibot.bot.suggest_help(missing_generator=not gen):
# pass generator and private options to the bot
bot = BasicBot(generator=gen, **options)
bot.run() # guess what it does
if __name__ == '__main__':
main()
transrefs.py ред.
Скрипт, що знаходить ref-цитування, в яких не визначений текст (що видає відповідну помилку), та шукає однойменні цитування в інших вікі (в коді прописано, щоб шукало їх тільки в англійській та російський вікі, проте нема технічних обмежень перевіряти взагалі всі, це лиш одна умова). Як і fixrefs.py, є напівавтоматичним, тобто користувач сам вирішує, чи робити заміну для кожного з випадків.
Планую незабаром зробити так, щоб програма також перевіряла теги, задані у шаблоні reflist.
# Окрема подяка Vahurzpu, що допоміг розібратись, як отримати доступ до іншомовних сторінок
#!/usr/bin/env python3
from __future__ import annotations
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import (
AutomaticTWSummaryBot,
ConfigParserBot,
ExistingPageBot,
SingleSiteBot,
)
import wikitextparser as wtp
import re
import difflib
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816
class BasicBot(
# Refer pywikobot.bot for generic bot classes
SingleSiteBot, # A bot only working on one site
ConfigParserBot, # A bot which reads options from scripts.ini setting file
# CurrentPageBot, # Sets 'current_page'. Process it in treat_page method.
# # Not needed here because we have subclasses
ExistingPageBot, # CurrentPageBot which only treats existing pages
AutomaticTWSummaryBot, # Automatically defines summary; needs summary_key
):
use_redirects = False # treats non-redirects only
summary_key = 'basic-changing'
update_options = {
'replace': False, # delete old text and write the new text
'summary': 'Підстановка цитувань з іншомовної вікі', # your own bot summary
'text': 'Test', # add this text from option. 'Test' is default
'top': False, # append text on top of the
'tlang': 'en',
}
def treat_page(self) -> None:
"""Load the given page, do some changes, and save it."""
text = self.current_page.text
summary = self.opt.summary
#print(self.current_page.extract(lines=2))
parsed = wtp.parse(text)
tags = parsed.get_tags(name="ref")
tags_dict = {}
for tag in tags:
if "name" in tag.attrs:
name = tag.attrs["name"].replace("/", "")
if name in tags_dict:
if len(tags_dict[name].contents) != 0: continue
tags_dict.update({name: tag})
empty_tags = {}
for key, value in tags_dict.items():
#print(f'{key}: {value.contents}')
if len(value.contents) == 0:
print(f'Empty value {key}!')
empty_tags.update({key: value})
if len(empty_tags):
try:
item = pywikibot.ItemPage.fromPage(self.current_page)
for iterlink in item.iterlinks():
#print(iterlink.site)
if iterlink.site.lang in ["en", "ru"]:
print(f'Looking through {iterlink.site.lang}')
iterlink_parsed = wtp.parse(iterlink.text)
iterlink_tags = iterlink_parsed.get_tags(name="ref")
iterlink_dict = {}
for tag in iterlink_tags:
if "name" in tag.attrs:
if len(tag.contents) == 0: continue
iterlink_dict.update({tag.attrs["name"]: tag})
#print(iterlink_dict)
for key, value in iterlink_dict.items():
if key in empty_tags.keys():
print(f'Found content for {key}: {value}')
print("Press y to accept replacement, n to decline")
while(True):
press_y = input()
if press_y == "y":
#text = text.replace(f'{key}"/>', f'{key}">{value.contents}</ref>', 1)
text = text.replace(empty_tags[key].string, value.string, 1)
#print(empty_tags[key].string)
#print(value.string
break
if press_y == "n": break
except pywikibot.exceptions.NoPageError:
print("No page found; Likely no interwiki pages are linked.")
#print("input")
#input()
# if summary option is None, it takes the default i18n summary from
# i18n subdirectory with summary_key as summary key.
self.put_current(text, summary=summary)
def main(*args: str) -> None:
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
options = {}
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
gen_factory = pagegenerators.GeneratorFactory()
# Process pagegenerators arguments
local_args = gen_factory.handle_args(local_args)
# Parse your own command line arguments
for arg in local_args:
arg, _, value = arg.partition(':')
option = arg[1:]
if option in ('summary', 'text', 'tlang'):
if not value:
pywikibot.input('Please enter a value for ' + arg)
options[option] = value
# take the remaining options as booleans.
# You will get a hint if they aren't pre-defined in your bot class
else:
options[option] = True
# The preloading option is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = gen_factory.getCombinedGenerator(preload=True)
# check if further help is needed
if not pywikibot.bot.suggest_help(missing_generator=not gen):
# pass generator and private options to the bot
bot = BasicBot(generator=gen, **options)
bot.run() # guess what it does
if __name__ == '__main__':
main()
rubook.py ред.
Змінює шаблон Шаблон:Книга на Шаблон:Книга-ру у випадках, якщо в Книзі російськомовні шаблони (заглавие, издательство)
from __future__ import annotations
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import (
AutomaticTWSummaryBot,
ConfigParserBot,
ExistingPageBot,
SingleSiteBot,
)
import wikitextparser as wtp
import re
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816
class BasicBot(
# Refer pywikobot.bot for generic bot classes
SingleSiteBot, # A bot only working on one site
ConfigParserBot, # A bot which reads options from scripts.ini setting file
# CurrentPageBot, # Sets 'current_page'. Process it in treat_page method.
# # Not needed here because we have subclasses
ExistingPageBot, # CurrentPageBot which only treats existing pages
AutomaticTWSummaryBot, # Automatically defines summary; needs summary_key
):
use_redirects = False # treats non-redirects only
summary_key = 'basic-changing'
update_options = {
'replace': False, # delete old text and write the new text
'summary': "[[Шаблон:Книга]] -> [[Шаблон:Книга-ру|Книга-ру]]", # your own bot summary
'text': 'Test', # add this text from option. 'Test' is default
'top': False, # append text on top of the page
}
def treat_page(self) -> None:
text = self.current_page.text
parsed = wtp.parse(text)
templates = parsed.templates
for template in templates:
if template.name.lower().strip() == "книга":
#print(template.string)
argnames = [argument.name.lower().strip() for argument in template.arguments]
for i in ["заглавие"]: # додати до списку "издательство", "страниц", "место", "ссылка" при ручній роботі — бо є випадки, коли виправляють "заглавие" на "Назва", а про решту параметрів чомусь забувають
if i in argnames:
template_str = template.string
template.name = "книга-ру"
if "назва" in argnames: # хтось чомусь не до кінця перекладає шаблон
print("warning: назва in argnames")
text = text.replace(template_str, template.string)
break
self.put_current(text, summary=self.opt.summary)
def main(*args: str) -> None:
options = {}
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
gen_factory = pagegenerators.GeneratorFactory()
# Process pagegenerators arguments
local_args = gen_factory.handle_args(local_args)
# Parse your own command line arguments
for arg in local_args:
arg, _, value = arg.partition(':')
option = arg[1:]
if option in ('summary', 'text'):
if not value:
pywikibot.input('Please enter a value for ' + arg)
options[option] = value
# take the remaining options as booleans.
# You will get a hint if they aren't pre-defined in your bot class
else:
options[option] = True
# The preloading option is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = gen_factory.getCombinedGenerator(preload=True)
# check if further help is needed
if not pywikibot.bot.suggest_help(missing_generator=not gen):
# pass generator and private options to the bot
bot = BasicBot(generator=gen, **options)
bot.run() # guess what it does
if __name__ == '__main__':
main()