Python背单词记单词小程序,可自定义词库,支持多种记忆模式,根据词义拼写、选择单词,根据词意选择单词,可乱序抽查,可对错题进行反复抽查
完整程序源代码下载:Python背单词记单词小程序
词库生成代码
gen_old_tem8.py
import random
import time
import json
import tempfile
import sys
import os
import logging
import base64import requestsfrom wordson.gen.gensource import GenSourceif sys.version_info[0] < 3:try:from io import openexcept ImportError:from codecs import opentry:FileNotFoundError
except NameError:FileNotFoundError = IOErrorclass GenOldTEM8(GenSource):SEED = 0LOGGER = logging.getLogger('wordson.gen.gen_old_tem8.GenOldTEM8')def __init__(self, practice_day):name = time.strftime('%Y-%m-%d', time.localtime())super(GenOldTEM8, self).__init__('English', u'daily-tem8', name)self.start = (practice_day - 1) * 20self.end = self.start + 20def get_source(self):source_file = os.path.join(self.WORDSDIR, 'TEM8-old', 'tem8_all.json')with open(source_file, 'r', encoding='utf-8') as f:source_all = json.load(f)random.seed(self.SEED)random.shuffle(source_all)source_need = source_all[self.start: self.end]self.old_source_id_to_info = dict(('_'.join(each['word']), each) for each in source_need)return ((key, value['word']) for key, value in self.old_source_id_to_info.items())def get_meaning(self, word_id):info = self.old_source_id_to_info[word_id]old_meaning_info = info['meaning']result = []for old_type, old_meaning in old_meaning_info:if old_type is None:new_type = Noneelse:new_type = [old_type]new_meaning = [old_meaning]result.append({'type': new_type,'meaning': new_meaning,})return resultdef get_pronunciation(self, word_id):words = self.old_source_id_to_info[word_id]['word']word = words[0]result = []temp_dir = tempfile.gettempdir()info_file = os.path.join(temp_dir, word + '.json')try:with open(info_file, 'r', encoding='utf-8') as f:pr_info = json.load(f)except (FileNotFoundError, IOError):self.LOGGER.info('get from merriam for %s', word)pr_info = self.pronunciation_from_merriam(word)with open(info_file, 'w', encoding='utf-8') as f:json.dump(pr_info, f)for sound_mark, sound_url in pr_info.items():# sound_mark_b64 = self.str_b64(sound_mark)# sound_mark_b64 = wordsound_mark_safe = sound_marksound_wav = os.path.join(temp_dir, sound_mark_safe + '.wav')sound_mp3 = os.path.join(temp_dir, sound_mark_safe + '.mp3')try:with open(sound_wav, 'rb') as f:wav_bin = f.read()except (FileNotFoundError, IOError):if not os.path.isfile(sound_mp3):self.LOGGER.info('get from merriam for %s, mp3 %s', word, sound_url)resp = requests.get(sound_url)assert 200 <= resp.status_code < 300self.LOGGER.info('save mp3 to %s', sound_mp3)with open(sound_mp3, 'wb') as f:f.write(resp.content)assert os.path.isfile(sound_mp3)self.LOGGER.info('conver %s to %s', sound_mp3, sound_wav)try:self.ffmpeg_convert(sound_mp3, sound_wav)except BaseException:try:os.remove(sound_wav)except BaseException:passraisewith open(sound_wav, 'rb') as f:wav_bin = f.read()result.append((sound_mark, wav_bin))else:result.append((sound_mark, wav_bin))return self.ONE2ONE, result@staticmethoddef str_b64(s):return base64.b64encode(s.encode('utf-8')).decode('utf-8')if __name__ == '__main__':from wordson.bashlog import getloggergetlogger(None, logging.DEBUG)_, day_str = sys.argvpract = int(day_str)saver = GenOldTEM8(pract)saver.run()saver.save()
gensource.py
import os
import sys
import logging
import subprocess
import jsonfrom bs4 import BeautifulSoup
import requestsfrom wordson.util import PROJECTDIRif sys.version_info[0] < 3:try:from io import openexcept ImportError:from codecs import opentry:FileExistsError
except NameError:FileExistsError = OSErrortry:FileNotFoundError
except NameError:FileNotFoundError = IOErrorclass GenSource(object):LOGGER = logging.getLogger('wordson.gen.gensource.GenSource')WORDSDIR = os.path.join(PROJECTDIR, 'words')MIXED = 0ONE2ONE = 1def __init__(self, language, category, name):self.language = languageself.category = categoryself.name = nameself.rel_folder = os.path.join(language, category, name)self.save_folder = os.path.join(self.WORDSDIR, self.rel_folder)self.save_file = os.path.join(self.save_folder, 'index.json')self.result = []def run(self):result = self.resulttry:os.makedirs(self.save_folder)except (FileExistsError, OSError):passfor word_id, word in self.get_source():meaning = self.get_meaning(word_id)pr = self.get_pronunciation(word_id)if pr is None:pronunciation = Noneelse:# pronunciation = Noneif pr[0] == self.MIXED:_, sound_marks, wav_binaries = prsound_fnames = []for index, wav_bin in enumerate(wav_binaries):fname = '{}-{}.wav'.format(word_id, index)fpath = os.path.join(self.save_folder, fname)with open(fpath, 'wb') as f:f.write(wav_bin)sound_fnames.append(fname)pronunciation = {"mode": "mixed","value": {"soundmarks": sound_marks,"soundpaths": sound_fnames}}elif pr[0] == self.ONE2ONE:_, marks_binaries_pari = prvalue = []for index, (sound_mark, wav_bin) in enumerate(marks_binaries_pari):fname = '{}-{}.wav'.format(word_id, index)fpath = os.path.join(self.save_folder, fname)with open(fpath, 'wb') as f:f.write(wav_bin)value.append({"soundmark": sound_mark,"soundpath": fname})pronunciation = {"mode": "one2one","value": value}else:pronunciation = Noneassert Falseword_info = {'word': word,'meaning': meaning,'pronunciation': pronunciation,}self.LOGGER.info(word_info)result.append(word_info)def save(self):try:os.makedirs(self.save_folder)except FileExistsError:passwith open(self.save_file, 'w', encoding='utf-8') as f:json.dump(self.result, f, indent=4, ensure_ascii=False)config = {'name': self.name,'language': self.language,'category': self.category,'folder': self.rel_folder,'words': 'index.json',}config_file = os.path.join(self.WORDSDIR, 'index.json')try:with open(config_file, 'r', encoding='utf-8') as f:config_full = json.load(f)except (FileNotFoundError, IOError):config_full = []for each in config_full:# no dup saveif each == config:breakelse:config_full.append(config)with open(config_file, 'w', encoding='utf-8') as f:json.dump(config_full, f, indent=4, ensure_ascii=False)def get_source(self):"""e.g.{'word_id': ['spell1', 'spell2'], // list, required}.items()"""passdef get_meaning(self, word_id):"""e.g.[{"type": ["n"], // list, None when not avaliable"meaning": ["测试"] // required, list},{"type": ["v"],"meaning": ["测试动词"]}]"""passdef get_pronunciation(self, word_id):"""three mode:return self.MIXED, ["soundmark1", "soundmark2"], [<wav-binary>, <wav-binary>, <wav-binary>]return self.ONE2ONE, [("soundmark1", <wav-binary>),("soundmark2", <wav-binary>),]"""pass@staticmethoddef pronunciation_from_merriam(word):page_url = 'https://www.merriam-webster.com/dictionary/{}'.format(word)resp = requests.get(page_url)assert 200 <= resp.status_code < 300, resp.status_codesoup = BeautifulSoup(resp.content, 'html5lib')result = {}for pr in soup.find_all(class_='pr'):# print(pr)sound_mark = pr.text.strip()print(sound_mark)prs = pr.parent# print(prs)a = prs.find('a')a_data_file = a.get('data-file')assert a_data_file is not Nonea_data_dir = a.get('data-dir')assert a_data_dir is not Nonesound_url = 'https://media.merriam-webster.com/audio/prons/en/us/mp3/{}/{}.mp3'.format(a_data_dir, a_data_file)print(sound_url)result[sound_mark] = sound_urlreturn result@staticmethoddef ffmpeg_convert(infile, outfile, ffmpeg='/usr/bin/ffmpeg'):cmd = [ffmpeg,'-y','-i', infile,'-strict', '-2',# '-an',outfile]return subprocess.check_call(cmd)if __name__ == '__main__':import sysresp = GenSource.pronunciation_from_merriam(sys.argv[1])print(resp)
完整程序源代码下载:Python背单词记单词小程序