#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import re
import sys
import time
import fitz
import xlwt
import requests
import easygui as EG
import pyzbar.pyzbar as pyzbar
from bs4 import BeautifulSoup
from PIL import Image
from aip import AipOcr

Invoice = r'C:\xzhou\InvoiceOCR'
xzhouP2P = r'C:\xzhou\InvoiceOCR\PDF2PNG'
BaiDuOCR = r'C:\xzhou\InvoiceOCR\BaiDuOCR'
Completed = r'C:\xzhou\InvoiceOCR\Completed'
InvoiceManual = r'C:\xzhou\InvoiceOCR\Manual'
Documents = r'C:\xzhou\InvoiceOCR\Documents'
ExcelPath = r'C:\xzhou\InvoiceOCR\Documents\Invoice.xlsx'

res = requests.get(r'xzhou.html').content
soup = BeautifulSoup(res, 'html.parser')
xzhou = soup.code.text.strip()
xzhou_options = eval(xzhou)
APP_ID = xzhou_options['options_APPID']
API_KEY = xzhou_options['options_APIKEY']
SECRET_KEY = xzhou_options['options_SECRETKEY']
Invoice_URL = xzhou_options['options_InvoiceURL']

Brooks = Invoice_URL + ' - 发票确认函识别助手'

def Validate(OldText):
    Illegal = r'[\/\\\:\*\?\"\<\>\|]'
    NewText = re.sub(Illegal, "_", OldText)
    return NewText

def get_file_content(FilePath):
    with open(FilePath, 'rb') as fp:
        return fp.read()

def CheckPNG(FileName,FilePath):
    PNG_CheckPath = BaiDuOCR + '\\' + FileName
    if os.path.exists(PNG_CheckPath):
        PNG_CheckPath = BaiDuOCR + '\\' + str(int(time.time())) + '-' + FileName
        os.rename(FilePath,PNG_CheckPath)
    else:
        os.rename(FilePath,PNG_CheckPath)
    print('等待接口识别:[' + PNG_CheckPath + ']')

def ManualPNG(FileName,FilePath):
    PNG_ManualPath = InvoiceManual + '\\' + FileName
    if os.path.exists(PNG_ManualPath):
        PNG_ManualPath = InvoiceManual + '\\' + str(int(time.time())) + '-' + FileName
        os.rename(FilePath,PNG_ManualPath)
    else:
        os.rename(FilePath,PNG_ManualPath)
    print('等待人工识别:[' + PNG_ManualPath + ']')

def GENEWIZ(OCR,FileName,FilePath):
    Number = ''
    for index in range(len(OCR)):
        GW = OCR[index].get('words')
        if '*' in GW:
            Number = GW[0:len(GW)-1]
            break
        elif GW.isdigit() and len(GW) == 8:
            Number = GW
            break
    if Number != '':
        xzhouNumber = Validate(Number)
        PNG_Completed = Completed + '\\' + xzhouNumber + '.png'
        if os.path.exists(PNG_Completed):
            PNG_Completed = Completed + '\\' + xzhouNumber + '_' + str(int(time.time())) + '.png'
            os.rename(FilePath,PNG_Completed)
        else:
            os.rename(FilePath,PNG_Completed)
        print('接口识别成功:[' + PNG_Completed + ']')
    else:
        ManualPNG(FileName,FilePath)

def InvoiceCrop(FileName,FilePath):
    img = Image.open(FilePath)
    size = img.size
    weight = int(size[0] // 3)
    height = int(size[1] // 4)
    box =  (weight * 2, height * 1, weight * 3, height * 2)
    region = img.crop(box)
    GifName = FileName.split('.')[0]
    GifPath = Documents + '\\' + GifName + '.gif'
    region.save(GifPath)
    return(GifPath)

def PDF2PNG2OCR():
    PDF_FileNames = os.listdir(xzhouP2P)
    zoom_x = 2.0
    zoom_y = 2.0
    rotate = int(0)
    for PDF_FileName in PDF_FileNames:
        if PDF_FileName.endswith('pdf'):
            PDF_FilePath = os.path.join(xzhouP2P, PDF_FileName)
            doc = fitz.open(PDF_FilePath)
            for pg in range(doc.pageCount):
                page = doc[pg]
                trans = fitz.Matrix(zoom_x,zoom_y).preRotate(rotate)
                pm = page.getPixmap(matrix=trans, alpha=False)
                PDFname = PDF_FileName.split('.')[0]
                PNGname = PDFname + '_' + str(pg) + '.png'
                pm.writePNG(xzhouP2P + '\\' + PNGname)
            doc.close()
            os.remove(PDF_FilePath)
    PNG_FileNames = os.listdir(xzhouP2P)
    for PNG_FileName in PNG_FileNames:
        if PNG_FileName.endswith("png") or PNG_FileName.endswith("jpg"):
            PNG_FilePath = os.path.join(xzhouP2P,PNG_FileName)
            img = Image.open(PNG_FilePath)
            BarCodes = pyzbar.decode(img)
            if BarCodes == []:
                CheckPNG(PNG_FileName,PNG_FilePath)
            else:
                BarCodeData = BarCodes[0].data.decode('utf-8')
                if BarCodeData != None:
                    if '*' in BarCodeData:
                        Number = BarCodeData[0:BarCodeData.rfind('*')]
                    else:
                        Number = BarCodeData
                    xzhouNumber = Validate(Number)
                    PNG_Completed = Completed + '\\' + xzhouNumber + '.png'
                    if os.path.exists(PNG_Completed):
                        PNG_Completed = Completed + '\\' + xzhouNumber + '_' + str(int(time.time())) + '.png'
                        os.rename(PNG_FilePath,PNG_Completed)
                    else:
                        os.rename(PNG_FilePath,PNG_Completed)
                    print('条码识别成功:[' + PNG_Completed + ']')
                else:
                    CheckPNG(PNG_FileName,PNG_FilePath)
    OK = EG.buttonbox(msg='发票确认函条码识别已完成!是否调用接口识别???', title=Brooks, choices=("调用接口识别","关闭接口识别"))
    if OK == "调用接口识别":
        BaiDuAip()
    else:
        EG.msgbox(msg='关闭发票确认函识别助手!', title='关闭提示', ok_button='再见')
        os.system("explorer.exe %s" % BaiDuOCR)
        sys.exit()

def BaiDuAip():
    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
    options = {}
    options["detect_direction"] = "true"
    PNG_FileNames = os.listdir(BaiDuOCR)
    for PNG_FileName in PNG_FileNames:
        if PNG_FileName.endswith("png") or PNG_FileName.endswith("jpg"):
            PNG_FilePath = os.path.join(BaiDuOCR, PNG_FileName)
            img = get_file_content(PNG_FilePath)
            msg = client.basicGeneral(img, options)
            try:
                error_code = msg['error_code']
                for i in range(9):
                    print(error_code + ' : ' + msg['error_msg'] + ' - ' + str(9-i) + 's')
                    time.sleep(1)
                break
            except:
                words_result = msg.get('words_result')
                direction = msg.get('direction')
            if words_result != []:
                GENEWIZ(words_result,PNG_FileName,PNG_FilePath)
                if direction == 1:
                    img = Image.open(PNG_FilePath)
                    PNG180 = img.transpose(Image.ROTATE_90)
                    PNG180.save(PNG_FilePath)
                    print('图片旋转完成:[' + PNG_FilePath + '](90)')
                if direction == 2:
                    img = Image.open(PNG_FilePath)
                    PNG180 = img.transpose(Image.ROTATE_180)
                    PNG180.save(PNG_FilePath)
                    print('图片旋转完成:[' + PNG_FilePath + '](180)')
                if direction == 3:
                    img = Image.open(PNG_FilePath)
                    PNG180 = img.transpose(Image.ROTATE_270)
                    PNG180.save(PNG_FilePath)
                    print('图片旋转完成:[' + PNG_FilePath + '](270)')
            else:
                ManualPNG(PNG_FileName,PNG_FilePath)
    OK = EG.buttonbox(msg='发票确认函接口识别已完成!是否开始人工识别???', title=Brooks, choices=("开始人工识别","关闭人工识别"))
    if OK == "开始人工识别":
        ManualOCR()
    else:
        EG.msgbox(msg='关闭发票确认函识别助手!', title='关闭提示', ok_button='再见')
        os.system("explorer.exe %s" % InvoiceManual)
        sys.exit()

def ManualOCR():
    PNG_FileNames = os.listdir(InvoiceManual)
    for PNG_FileName in PNG_FileNames:
        if PNG_FileName.endswith("png") or PNG_FileName.endswith("jpg"):
            PNG_FilePath = os.path.join(InvoiceManual, PNG_FileName)
            img = InvoiceCrop(PNG_FileName,PNG_FilePath)
            while True:
                xzhouNumber = EG.enterbox(msg='请输入发票确认函的发票号码', title=Brooks, strip=True, image=img)
                if xzhouNumber == None:
                    GO = EG.buttonbox(msg='请确认是否关闭人工识别?', title='关闭提示', choices=("继续运行","关闭运行"))
                    if GO == '继续运行':
                        continue
                    else:
                        os.remove(img)
                        os.system("explorer.exe %s" % InvoiceManual)
                        sys.exit()
                elif len(xzhouNumber) != 8:
                    EG.msgbox(msg='发票号码输入错误请重试!', title='错误提示', ok_button='继续')
                    continue
                else:
                    PNG_Completed = Completed + '\\' + xzhouNumber + '.png'
                    if os.path.exists(PNG_Completed):
                        PNG_Completed = Completed + '\\' + xzhouNumber + '_' + str(int(time.time())) + '.png'
                        os.rename(PNG_FilePath,PNG_Completed)
                    else:
                        os.rename(PNG_FilePath,PNG_Completed)
                    os.remove(img)
                    print('人工识别成功:[' + PNG_Completed + ']')
                    break
    OK = EG.buttonbox(msg='发票确认函人工识别已完成!是否生成表格数据???', title=Brooks, choices=("生成表格数据","关闭识别助手"))
    if OK == "生成表格数据":
        GenerateExcel()
    else:
        EG.msgbox(msg='关闭发票确认函识别助手!', title='关闭提示', ok_button='再见')
        os.system("explorer.exe %s" % InvoiceManual)
        sys.exit()

def GenerateExcel():
    FileDir = EG.fileopenbox(msg='请先选择生成数据的文件目录', title=Brooks, default=Completed, filetypes=None, multiple=False)
    if FileDir == None:
        sys.exit()
    else:
        if os.path.isdir(FileDir):
            nowTime = str(time.strftime("%Y %m %d %H %M", time.localtime())).replace(" ", "")
            wb = xlwt.Workbook()
            ws = wb.add_sheet(nowTime)
            FileNames = os.listdir(FileDir)
            ws.write(0,0,FileDir)
            index = 1
            for FileName in FileNames:
                if FileName.endswith('png') or FileName.endswith('jpg'):
                    InvoiceNumber = FileName[0:FileName.rfind('.')]
                    if InvoiceNumber.isdigit():
                        ws.write(index,0,InvoiceNumber)
                        index += 1
            wb.save(ExcelPath)
            os.system('start explorer %s' % ExcelPath)
            sys.exit()
        else:
            EG.msgbox(msg='选择的文件目录路径错误!', title='关闭提示', ok_button='再见')
            sys.exit()

if __name__ == '__main__':
    MK1 = '发票确认函识别助手 - 注意事项 - 必读!!!' + '\n\n' + '\n\n'
    MK2 = '【根目录】:C:\\xzhou\\InvoiceOCR (绝对路径)' + '\n\n'
    MK3 = '1: 请将.PDF.PNG.JPG格式文件放在PDF2PNG文件目录内' + '\n\n'
    MK4 = '2: 扫描确认函请选择灰色模式 - 未识别过滤到PleaseCheck' + '\n\n'
    MK5 = '3:发票确认函识别成功后会移动至根目录Invoice下Completed内' + '\n\n'
    MK6 = '3:发票确认函进行手动识别前,请检查目录Manual文件内的图片方向。' + '\n\n'
    MK7 = '4:接口识别对接百度开放平台有次数限制,识别失败移动至Manual文件内。' + '\n\n'
    MK8 = '5:请勿删除移动修改文件根目录下的任何文件路径,使用中请勿关闭CMD窗口。' + '\n\n'
    MK9 = '6:发票确认函识别助手仅限团队内部使用,未经同意请勿外传。版权归作者所有!' + '\n\n' + '\n\n'
    MK0 = '运行识别助手建议顺序:【条码识别】 - 【接口识别】 - 【人工识别】'
    MK =  '\n\n' + MK1 + MK2 + MK3 + MK4 + MK5 + MK6 + MK7 + MK8 + MK9 + MK0
    OK = EG.buttonbox(msg=MK, title=Brooks, choices=("运行条码识别","运行接口识别","运行人工识别","关闭识别助手"))
    if OK == "运行条码识别":
        PDF2PNG2OCR()
    elif OK == "运行接口识别":
        BaiDuAip()
    elif OK == "运行人工识别":
        ManualOCR()
    else:
        while True:
            xzhou = EG.msgbox(msg='灵魂拷问:谁是世界上最帅的人???', title=Brooks, ok_button='周晓竞')
            if xzhou:
                sys.exit()
            else:
                continue
    
COURIER ● 豫ICP备2020027789号 ● XZHOU