Created with "Python application"
Two things are required: BeautifulSoup4, requests
# -*- coding: shift-jis -*-
from bs4 import BeautifulSoup
import requests
import time
import html
def getl(name):
url = get_url(name)
time.sleep(1)
if url == "null":
print("Failed...(Song does not exist...)")
return
res = requests.get(url)
soup = BeautifulSoup(res.content,'html.parser')
elems = soup.find_all()
start = 0
lyc = ""
#Find out where the lyrics come from.
for i in range(len(elems)):
if elems[i].name == "h3":
if '歌詞' in elems[i].text:
start = i
break
#Read the lyrics.
for i in range(start + 1,len(elems)):
if len(elems[i].text) >= 3:
#Occasionally.
if elems[i].get("target") == "_blank":
continue
#Ends when a comment or related video (h3 tag) arrives.
elif elems[i].name == "h3" and ('コメント' in elems[i].text or '関連動画' in elems[i].text):
break
lyc += elems[i].text.replace("\n\n\n","\n").replace("\n\n","\n") #Reduce line breaks.
return lyc
#Get page number.
def get_url(name):
res = requests.get("https://w.atwiki.jp/hmiku/list?&keyword=" + name)
#Exclude CD and archives.
txt = html.unescape(res.text).replace('title="' + name + "/CD", "").replace('title="' + name + "/過去ログ", "")
#Check if the song name exists. If it does, there may be a duplicate song name.
index = txt.find('title="' + name + "/")
if index != -1:
index = txt.find('title="' + name)
#Check if songs overlap after the first hit position.
dq = 0
y = txt[index + 1:]
index2 = txt[index + 1:].find('title="' + name)
if index2 != -1:
print("There are duplicate song titles. Please narrow down your search by VocaloidP.")
name2 = input("VocaloidP:")
index = txt.find('title="' + name + '/' + name2)
else:
#Check if song title (update date) exists .
#Make sure there is no disambiguation page above, then check to see if there is a standalone page.
index = txt.find('title="' + name + " (")
if index == -1:
return "null"
dq = 0
purl = ""
#Bring in the href.
while dq < 2:
if txt[index] == '"':
dq += 1
elif dq == 1:
purl = txt[index] + purl;
index -= 1
purl = "https:" + purl;
return purl
while 1:
ip = input("Song name:")
if ip == "\q":
break
txt = getl(ip)
print("-----------")
print(txt)
print("-----------")