Files
miniapp-api/src/services/ocrService.js
T

244 lines
6.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const axios = require('axios');
const BAIDU_OCR_API = {
token: 'https://aip.baidubce.com/oauth/2.0/token',
generalBasic: 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic',
accurateBasic: 'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic'
};
let accessTokenCache = {
token: null,
expireAt: 0
};
async function getAccessToken() {
const now = Date.now();
if (accessTokenCache.token && accessTokenCache.expireAt > now + 60000) {
return accessTokenCache.token;
}
const apiKey = process.env.BAIDU_OCR_API_KEY;
const secretKey = process.env.BAIDU_OCR_SECRET_KEY;
if (!apiKey || !secretKey) {
throw new Error('百度OCR配置缺失:请检查 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY');
}
const response = await axios.post(BAIDU_OCR_API.token, null, {
params: {
grant_type: 'client_credentials',
client_id: apiKey,
client_secret: secretKey
},
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
});
const { access_token, expires_in } = response.data;
if (!access_token) {
throw new Error(`获取百度OCR Token失败: ${JSON.stringify(response.data)}`);
}
accessTokenCache = {
token: access_token,
expireAt: now + (expires_in * 1000)
};
return access_token;
}
async function recognizeText(imageBase64, options = {}) {
const accessToken = await getAccessToken();
const url = `${BAIDU_OCR_API.generalBasic}?access_token=${accessToken}`;
const params = new URLSearchParams();
params.append('image', imageBase64);
if (options.language_type) {
params.append('language_type', options.language_type);
}
const response = await axios.post(url, params.toString(), {
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
}
});
return response.data;
}
async function recognizeTextAccurate(imageBase64, options = {}) {
const accessToken = await getAccessToken();
const url = `${BAIDU_OCR_API.accurateBasic}?access_token=${accessToken}`;
const params = new URLSearchParams();
params.append('image', imageBase64);
if (options.language_type) {
params.append('language_type', options.language_type);
}
const response = await axios.post(url, params.toString(), {
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
}
});
return response.data;
}
function getDefaultPrice(type, times) {
const prices = {
year: '120',
halfYear: '60',
quarter: '30',
month: '10'
};
if (type === 'times') {
const timesCount = parseInt(times) || 1;
return String(timesCount * 10);
}
return prices[type] || '10';
}
function extractMembershipInfo(ocrResult) {
if (!ocrResult || !ocrResult.words_result || ocrResult.words_result.length === 0) {
return [];
}
const text = ocrResult.words_result.map(w => w.words).join('\n');
const lines = ocrResult.words_result.map(w => w.words);
const platformKeywords = {
'淘宝': ['淘宝', 'taobao', '88vip', '88VIP'],
'京东': ['京东', 'jd', 'JD', 'plus', 'PLUS'],
'拼多多': ['拼多多', 'pdd', 'PDD'],
'美团': ['美团', 'meituan'],
'饿了么': ['饿了么', 'eleme', 'ele.me'],
'抖音': ['抖音', 'douyin', 'tiktok'],
'快手': ['快手', 'kuaishou'],
'网易云音乐': ['网易云', 'netease', '163'],
'QQ音乐': ['QQ音乐', 'qq音乐'],
'优酷': ['优酷', 'youku'],
'爱奇艺': ['爱奇艺', 'iqiyi'],
'腾讯视频': ['腾讯视频', 'v.qq'],
'哔哩哔哩': ['哔哩哔哩', 'bilibili', 'B站'],
'喜马拉雅': ['喜马拉雅', 'ximalaya'],
'知乎': ['知乎', 'zhihu'],
'百度网盘': ['百度网盘', '百度云']
};
let platform = null;
for (const [pName, keywords] of Object.entries(platformKeywords)) {
for (const keyword of keywords) {
if (text.toLowerCase().includes(keyword.toLowerCase())) {
platform = pName;
break;
}
}
if (platform) break;
}
const typePatterns = [
{ patterns: [/年卡/, /年度会员/, /\d+年/], type: 'year' },
{ patterns: [/半年卡/, /半年会员/, /6个月/], type: 'halfYear' },
{ patterns: [/季卡/, /季度会员/, /3个月/], type: 'quarter' },
{ patterns: [/月卡/, /月度会员/, /1个月/], type: 'month' },
{ patterns: [/次卡/, /按次数/], type: 'times' }
];
let detectedType = 'month';
for (const { patterns, type } of typePatterns) {
for (const pattern of patterns) {
if (pattern.test(text)) {
detectedType = type;
break;
}
}
if (detectedType !== 'month') break;
}
const datePatterns = [
/(\d{4})[年/-](\d{1,2})[月/-](\d{1,2})/,
/(\d{4})(\d{2})(\d{2})/,
/(\d{2})[年/-](\d{1,2})[月/-](\d{1,2})/,
/有效期[至到:]\s*(\d{4})[年/-](\d{1,2})[月/-](\d{1,2})/,
/到期[时间日]:?\s*(\d{4})[年/-](\d{1,2})[月/-](\d{1,2})/,
/(\d{4})\.(\d{1,2})\.(\d{1,2})/
];
let expireDate = '9999-12-31';
for (const pattern of datePatterns) {
const match = text.match(pattern);
if (match) {
let year = match[1];
const month = match[2].padStart(2, '0');
const day = match[3].padStart(2, '0');
if (year.length === 2) {
year = '20' + year;
}
expireDate = `${year}-${month}-${day}`;
break;
}
}
const benefitKeywords = [
'优酷', '网易云', 'QQ音乐', '酷狗', '酷我',
'爱奇艺', '腾讯视频', '芒果TV', '哔哩哔哩',
'饿了么', '美团', '高德打车', '滴滴',
'夸克', '百度网盘', '迅雷',
'喜马拉雅', '知乎', '微博',
'淘票票', '飞猪', '希尔顿', '万豪',
'视频会员', '超级吃货卡', '天猫超市', '天猫国际',
'阿里健康', '专属客服', '省钱卡', '网盘会员',
'打车会员', '金卡', '皮肤装扮', '每日领券',
'出行礼遇', '专享立减', '游戏特权'
];
const benefits = [];
for (const line of lines) {
for (const keyword of benefitKeywords) {
if (line.includes(keyword)) {
const existing = benefits.find(b => b.name === keyword);
if (!existing) {
benefits.push({
name: keyword,
type: detectedType,
times: detectedType === 'times' ? null : null,
price: getDefaultPrice(detectedType, null),
expireDate: expireDate
});
}
}
}
}
if (benefits.length === 0 && platform) {
benefits.push({
name: platform,
type: detectedType,
times: null,
price: getDefaultPrice(detectedType, null),
expireDate: expireDate
});
}
return benefits;
}
module.exports = {
getAccessToken,
recognizeText,
recognizeTextAccurate,
extractMembershipInfo
};