#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import os
import re
import sys
import time
import json
import base64
import fitz
import requests
from bs4 import BeautifulSoup
import numpy as np
import easygui as EG
import pyzbar.pyzbar as pyzbar
from hashlib import md5
from PIL import Image, ImageFont, ImageDraw
from openpyxl import load_workbook
from selenium import webdriver
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.ocr.v20181119 import ocr_client, models
Invoice = r'C:\xzhou\Invoice'
xzhouP2P = r'C:\xzhou\Invoice\PDF2PNG'
Documents = r'C:\xzhou\Invoice\Documents'
xzhouYZM = r'C:\xzhou\Invoice\PDF2PNG\yzm'
xzhouTEST = r'C:\xzhou\Invoice\PDF2PNG\test'
MyInvoice = r'C:\xzhou\Invoice\PDF2PNG\MyInvoice'
Completed = r'C:\xzhou\Invoice\PDF2PNG\Completed'
PleaseCheck = r'C:\xzhou\Invoice\PDF2PNG\PleaseCheck'
xzhouGetYZM = r'C:\xzhou\Invoice\PDF2PNG\yzm\yanzhengma'
xzhouSetYZM = r'C:\xzhou\Invoice\PDF2PNG\yzm\chaojiying'
ReportError = r'C:\xzhou\Invoice\PDF2PNG\yzm\ReportError'
chrome_driver = r'C:\xzhou\Invoice\Documents\chromedriver.exe'
MyStealthJS = r'C:\xzhou\Invoice\Documents\MyStealth.min.js'
Automation = r'C:\xzhou\Invoice\Documents\Automation.xlsx'
fphmList = r'C:\xzhou\Invoice\Documents\fphmList.txt'
Red = r'C:\xzhou\Invoice\Documents\BgRed.png'
Yellow = r'C:\xzhou\Invoice\Documents\BgYellow.png'
Blue = r'C:\xzhou\Invoice\Documents\BgBlue.png'
Black = r'C:\xzhou\Invoice\Documents\BgBlack.png'
res = requests.get(r'xzhou.html').content
soup = BeautifulSoup(res, 'html.parser')
xzhou = soup.code.text.strip()
xzhou_options = eval(xzhou)
WinWait = xzhou_options['options_WinWait']
user_name = xzhou_options['options_username']
pass_word = xzhou_options['options_password']
getyamid = xzhou_options['options_getyzmid']
Invoice_URL = xzhou_options['options_InvoiceURL']
window_size = xzhou_options['options_window-size']
winkiosk = xzhou_options['options_winkiosk']
headless = xzhou_options['options_headless']
SecretId = xzhou_options['options_SecretId']
SecretKey = xzhou_options['options_SecretKey']
options = webdriver.ChromeOptions()
options.add_argument(headless)
options.add_argument(window_size)
options.add_argument(winkiosk)
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
options.add_argument('--enable-webgl')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-software-rasterizer')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
cred = credential.Credential(SecretId, SecretKey)
httpProfile = HttpProfile()
httpProfile.endpoint = "ocr.tencentcloudapi.com"
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile
client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)
req = models.VatInvoiceOCRRequest()
Brooks = Invoice_URL + ' - 发票查验助手'
browser = webdriver.Chrome(executable_path=chrome_driver,options=options)
with open(MyStealthJS) as M:
js = M.read()
browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": js
})
browser.implicitly_wait(WinWait)
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)'
}
def PostPic(self, im, codetype):
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def GetScore(self):
params = {
'user': self.username,
'pass2': self.password,
}
r = requests.post('http://upload.chaojiying.net/Upload/GetScore.php', data=params, headers=self.headers)
return r.json()
def ReportError(self, im_id):
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
def CheckPNG(PNG_FileName,PNG_FilePath):
PNG_CheckPath = PleaseCheck + '\\' + PNG_FileName
if not os.path.exists(PNG_CheckPath):
PNG_CheckPath = PleaseCheck + '\\' + str(int(time.time())) + '-' + PNG_FileName
os.rename(PNG_FilePath,PNG_CheckPath)
else:
os.rename(PNG_FilePath,PNG_CheckPath)
print(PNG_CheckPath)
def FONT2PNG():
FontPath = Documents + '\\' + 'msyh.ttc'
Font = ImageFont.truetype(FontPath, 17)
img = Image.new('RGB',(120,30),(255,255,255))
draw = ImageDraw.Draw(img)
Tips = u"请输入红色文字"
draw.text((1,3), text=Tips, font=Font, fill=(255, 0, 0), align='center')
img.save(Documents + '\\' + 'BgRed.png')
img = Image.new('RGB',(120,30),(255,255,255))
draw = ImageDraw.Draw(img)
Tips = u"请输入黄色文字"
draw.text((1,3), text=Tips, font=Font,fill=(255, 215, 0), align='center')
img.save(Documents + '\\' + 'BgYellow.png')
img = Image.new('RGB',(120,30),(255,255,255))
draw = ImageDraw.Draw(img)
Tips = u"请输入蓝色文字"
draw.text((1,3), text=Tips, font=Font, fill=(0, 0, 255), align='center')
img.save(Documents + '\\' + 'BgBlue.png')
img = Image.new('RGB',(120,30),(255,255,255))
draw = ImageDraw.Draw(img)
Tips = u"请输入全部文字"
draw.text((1,3), text=Tips, font=Font, fill=(0, 0, 0), align='center')
img.save(Documents + '\\' + 'BgBlack.png')
def FontYZM(info,img):
if not os.path.exists(Red) or not os.path.exists(Yellow) or not os.path.exists(Blue) or not os.path.exists(Black):
FONT2PNG()
try:
data = img.split(',')[1]
except:
time.sleep(1)
info = browser.find_element_by_id("yzminfo").text
img = browser.find_element_by_id("yzm_img").get_attribute("src")
try:
data = img.split(',')[1]
except:
EG.msgbox(msg='发票查询验证码获取失败!',title='错误提示', ok_button='再见')
os.system("explorer.exe %s" % xzhouP2P)
browser.quit()
sys.exit()
image_data = base64.b64decode(data)
T = str(int(time.time()))
fp_yzm = xzhouGetYZM + '\\' + T + '.png'
with open(fp_yzm,'wb') as x:
x.write(image_data)
if info == '请输入验证码图片中红色文字':
BG = Red
elif info == '请输入验证码图片中黄色文字':
BG = Yellow
elif info == '请输入验证码图片中蓝色文字':
BG = Blue
elif info == '请输入验证码文字':
BG = Black
img_head = Image.open(BG)
im_info = np.array(img_head)
im_foot = Image.open(fp_yzm)
im_tpsb = np.array(im_foot)
im_fp = np.concatenate((im_info, im_tpsb), axis=0)
img_yzm = Image.fromarray(im_fp)
im_yzm = xzhouSetYZM + '\\' + T + '.png'
img_yzm.save(im_yzm)
return (im_yzm,fp_yzm)
def CheckInvoice(Excel,Sheet):
print('Run Time = ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
nowTime = str(time.strftime("%Y %m %d %H %M", time.localtime())).replace(" ", "")
MyFilePath = MyInvoice + '\\' + nowTime
if not os.path.exists(MyFilePath):
os.makedirs(MyFilePath)
Row = Sheet.max_row
zoom_x = 2.0
zoom_y = 2.0
rotate = int(0)
PDF_FileNames = os.listdir(xzhouP2P)
for PDF_FileName in PDF_FileNames:
if PDF_FileName.endswith('pdf'):
PDF_FilePath = os.path.join(xzhouP2P,PDF_FileName)
doc = fitz.open(PDF_FilePath)
for pg in range(doc.pageCount):
page = doc[pg]
trans = fitz.Matrix(zoom_x,zoom_y).preRotate(rotate)
pm = page.getPixmap(matrix=trans,alpha=False)
PDFname = PDF_FileName.split('.')[0]
PNGname = PDFname + '_' + str(pg) + '.png'
pm.writePNG(xzhouP2P + '\\' + PNGname)
doc.close()
os.remove(PDF_FilePath)
PNG_FileNames = os.listdir(xzhouP2P)
for PNG_FileName in PNG_FileNames:
if PNG_FileName.endswith("png") or PNG_FileName.endswith("jpg"):
PNG_FilePath = os.path.join(xzhouP2P,PNG_FileName)
print(PNG_FilePath)
img = Image.open(PNG_FilePath)
BarCodes = pyzbar.decode(img,symbols=[pyzbar.ZBarSymbol.QRCODE])
if BarCodes == []:
try:
f = open(PNG_FilePath, 'rb').read()
img = base64.b64encode(f)
picbase = str(img, encoding='utf-8')
params = {
"ImageBase64": picbase
}
req.from_json_string(json.dumps(params))
resp = client.VatInvoiceOCR(req)
reinfo = json.loads(resp.to_json_string())
for i in reinfo["VatInvoiceInfos"]:
name = i['Name']
value = i['Value']
if name == '发票代码':
fpdm = value
if name == '发票号码':
fphm = value[-8:]
if name == '开票日期':
kprq = ''.join(re.findall("\d+",value))
if name == '校验码':
jym = value[-6:]
break
if fpdm != '' and fphm != '' and kprq != '' and jym != '':
Row = Row + 1
x = True
else:
x = False
except TencentCloudSDKException as err:
print(err)
x = False
else:
if BarCodes[0].type == 'QRCODE':
BarCode = BarCodes[0].data.decode('utf-8')
try:
Bardata = BarCode.split(',')
except:
Bardata = []
if len(Bardata) == 8:
Row = Row + 1
fpdm_list = Bardata[2:3]
fpdm = ''.join(fpdm_list)
fphm_list = Bardata[3:4]
fphm = ''.join(fphm_list)
kprq_list = Bardata[5:6]
kprq = ''.join(kprq_list)
kpje_list = Bardata[4:5]
kpje = ''.join(kpje_list)
jym_list = Bardata[6:7]
jym_all = ''.join(jym_list)
jym = jym_all[-6:]
x = True
else:
x = False
else:
x = False
if x:
with open(fphmList, "r") as f:
fphm_data = f.read()
fphm_list = fphm_data.split(',')
if fphm in fphm_list:
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '尚未验证'
Sheet.cell(Row,4).value = PNG_FilePath
Sheet.cell(Row,5).value = '发票查验重复'
Excel.save(Automation)
continue
browser.get('https://inv-veri.chinatax.gov.cn/index.html')
browser.find_element_by_id("fpdm").send_keys(fpdm)
fpdmjy = browser.find_element_by_id("fpdmjy").text
if fpdmjy == '发票代码有误!':
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '尚未查验'
Sheet.cell(Row,4).value = PNG_FilePath
Sheet.cell(Row,5).value = '发票代码有误'
Excel.save(Automation)
continue
else:
browser.find_element_by_id("fphm").send_keys(fphm)
browser.find_element_by_id("kprq").send_keys(kprq)
context = browser.find_element_by_id("context").text
if context == '开具金额(不含税):':
kjje = kpje
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '尚未查验'
Sheet.cell(Row,4).value = PNG_FilePath
Sheet.cell(Row,5).value = '发票类型有误'
Excel.save(Automation)
continue
if context == '校验码:':
kjje = jym
browser.find_element_by_id("kjje").send_keys(kjje)
time.sleep(1)
yzminfo = browser.find_element_by_id("yzminfo").text
yzm_img = browser.find_element_by_id("yzm_img").get_attribute("src")
getImg = FontYZM(yzminfo,yzm_img)
im = open(getImg[0], 'rb').read()
CJY = chaojiying.PostPic(im, 6004)
im_err = CJY['err_no']
im_id = CJY['pic_id']
if im_err == 0:
getYzm = CJY['pic_str']
while True:
JC = 0
browser.find_element_by_id("yzm").send_keys(getYzm)
browser.find_element_by_id("checkfp").click()
try:
alertMsg = browser.find_element_by_id('popup_message').text
if alertMsg == '验证码失效!' or alertMsg == '验证码错误!':
JC = JC + 1
if (JC == 4):
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '三次失败'
Sheet.cell(Row,4).value = PNG_FilePath
Sheet.cell(Row,5).value = '发票查验失败'
Excel.save(Automation)
break
browser.find_element_by_id("popup_ok").click()
browser.find_element_by_id("yzm_img").click()
browser.find_element_by_id("yzm").clear()
chaojiying.ReportError(im_id)
ErrorPath = ReportError + '\\' + getYzm + '-' + im_id + '.png'
os.rename(getImg[0],ErrorPath)
time.sleep(1)
yzminfo = browser.find_element_by_id("yzminfo").text
yzm_img = browser.find_element_by_id("yzm_img").get_attribute("src")
getImg = FontYZM(yzminfo,yzm_img)
im = open(getImg[0], 'rb').read()
CJY = chaojiying.PostPic(im, 6004)
im_err = CJY['err_no']
im_id = CJY['pic_id']
if im_err == 0:
getYzm = CJY['pic_str']
else:
chaojiying.ReportError(im_id)
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '验证失败'
Sheet.cell(Row,4).value = PNG_FilePath
Sheet.cell(Row,5).value = '发票查验失败'
Excel.save(Automation)
elif alertMsg == '超过该张发票当日查验次数(请于次日再次查验)!':
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '次数超限'
Sheet.cell(Row,4).value = PNG_FilePath
Sheet.cell(Row,5).value = '发票查验失败'
Excel.save(Automation)
browser.find_element_by_id("popup_ok").click()
break
elif alertMsg == '验证码请求次数过于频繁,请1分钟后再试!':
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '次数频繁'
Sheet.cell(Row,4).value = PNG_FilePath
Sheet.cell(Row,5).value = '发票查验失败'
Excel.save(Automation)
browser.find_element_by_id("popup_ok").click()
EG.msgbox(msg='验证码请求次数过于频繁!',title='错误提示',ok_button='再见')
os.system("explorer.exe %s" % PNG_FilePath)
browser.quit()
sys.exit()
except:
InvoicePNG = MyFilePath + '\\' + fphm + '.png'
if os.path.exists(InvoicePNG):
InvoicePNG = MyFilePath + '\\' + fphm + '-' + str(int(time.time())) + '.png'
width = "return document.getElementsByTagName('dialog')[0].style.width='100%'"
height = "return document.getElementsByTagName('dialog')[0].style.height='100%'"
browser.execute_script(width)
browser.execute_script(height)
browser.switch_to.frame("dialog-body")
dialog = browser.find_element_by_id("content")
time.sleep(1)
dialog.screenshot(InvoicePNG)
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '验证通过'
Sheet.cell(Row,4).value = InvoicePNG
Sheet.cell(Row,5).value = '发票查验成功'
Excel.save(Automation)
with open(fphmList,"a") as f:
f.write(fphm + ',')
PNG_Completed =Completed + '\\' + fphm + '.png'
if not os.path.exists(PNG_Completed):
PNG_Completed = Completed + '\\' + fphm + '-' + str(int(time.time())) + '.png'
os.rename(PNG_FilePath,PNG_Completed)
else:
os.rename(PNG_FilePath,PNG_Completed)
print(PNG_Completed)
Set_YZM = xzhouSetYZM + '\\' + getYzm + '-' + str(int(time.time())) + '.png'
Get_YZM = xzhouGetYZM + '\\' + getYzm + '-' + str(int(time.time())) + '.png'
os.rename(getImg[0],Set_YZM)
os.rename(getImg[1],Get_YZM)
browser.find_element_by_id("closebt").click()
browser.switch_to.default_content()
break
else:
chaojiying.ReportError(im_id)
Sheet.cell(Row,1).value = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Sheet.cell(Row,2).value = fphm
Sheet.cell(Row,3).value = '验证失败'
Sheet.cell(Row,4).value = PNG_FilePath
Sheet.cell(Row,5).value = '发票查验失败'
Excel.save(Automation)
else:
CheckPNG(PNG_FileName,PNG_FilePath)
print('End Time = ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Excel.save(Automation)
os.system("explorer.exe %s" % Automation)
browser.quit()
sys.exit()
if __name__ == "__main__":
chaojiying = Chaojiying_Client(user_name, pass_word, getyamid)
TF = chaojiying.GetScore()['tifen']
CS = int(int(TF)/15)
if CS >= 15:
MK1 = '发票查验助手 - 使用注意事项 - 必读!!!' + '\n\n' + '\n\n'
MK2 = '【文件根目录】:C:\\xzhou\\Invoice (绝对路径)' + '\n\n'
MK3 = '1: 请将.PDF.PNG.JPG格式查验文件放在PDF2PNG文件内' + '\n\n'
MK4 = '2: 发票查验默认识别普票 - 其他类型过滤到PleaseCheck' + '\n\n'
MK5 = '3:发票查验成功后的截图位于根目录Invoice下的MyInvoice内' + '\n\n'
MK6 = '4:隐藏模式全自动操作,使用中请勿关闭CMD窗口。直接最小化即可!' + '\n\n'
MK7 = '5:请勿删除移动修改文件根目录下的任何配置文件,以免发生未知的错误。' + '\n\n'
MK8 = '6:发票查验助手仅限助理团队内部使用,未经同意请勿外传。版权归作者所有!' + '\n\n'
MK9 = '【当验证码查验剩余次数低于15次时 - 发票查验助手将会禁止运行】'+ '\n\n' + '\n\n'
MK0 = '剩余次数:' + str(CS) + ' 次 ( ' + str(TF) + ' ) 请注意!!!'
MK = '\n\n' + MK1 + MK2 + MK3 + MK4 + MK5 + MK6 + MK7 + MK8 + MK9 + MK0
GO = EG.buttonbox(msg=MK, title=Brooks, choices=("运行发票查验助手","关闭发票查验助手"))
if GO == '运行发票查验助手':
WorkBook = load_workbook(Automation)
GetSheet = WorkBook['xzhou']
CheckInvoice(WorkBook,GetSheet)
else:
while True:
xzhou = EG.msgbox(msg='灵魂拷问:谁是世界上最帅的人???', title=Brooks, ok_button='周晓竞')
if xzhou:
browser.quit()
sys.exit()
else:
continue
else:
while True:
MK6 = '警告:当前剩余次数小于15次,发票查验助手被禁止执行。'
MK9 = '剩余次数:' + str(CS) + ' 次 ( ' + str(TF) + ' ) 请充值!!!'
MK0= '\n\n' + Invoice_URL +'\n\n' + '\n\n'+ MK6 + '\n\n' + '\n\n' + MK9
xzhou = EG.msgbox(msg=MK0, title=Brooks, ok_button='18021275590')
if xzhou:
browser.quit()
sys.exit()
else:
continue