3v324v23's picture
feat: Initial commit for Hugging Face Spaces
1b21241
const fs = require('fs');
const path = require('path');
const { XMLParser, XMLBuilder } = require('fast-xml-parser');
const StreamArray = require('stream-json/streamers/StreamArray');
const { chain } = require('stream-chain');
const { parser } = require('stream-json');
const { pick } = require('stream-json/filters/Pick');
const { ignore } = require('stream-json/filters/Ignore');
const performanceMonitor = require('./performance');
/**
* XML与JSON处理模块
* 演示:
* 1. 使用 fast-xml-parser 解析和生成XML
* 2. 使用 stream-json 流式处理大型JSON文件
*/
class XmlJsonHandler {
constructor() {
this.xmlParser = new XMLParser();
this.xmlBuilder = new XMLBuilder();
}
/**
* 生成测试用的 XML 数据
*/
generateXml(itemCount = 1000) {
performanceMonitor.start('生成XML');
const items = [];
for (let i = 0; i < itemCount; i++) {
items.push({
id: i,
name: `Item_${i}`,
price: Math.random() * 100,
description: `Description for item ${i}`
});
}
const xmlObj = {
root: {
metadata: { timestamp: Date.now() },
items: { item: items }
}
};
const xmlContent = this.xmlBuilder.build(xmlObj);
performanceMonitor.end('生成XML');
return xmlContent;
}
/**
* 解析 XML 数据
*/
parseXml(xmlContent) {
performanceMonitor.start('解析XML');
const result = this.xmlParser.parse(xmlContent);
// 简单处理:统计 items 数量
const count = result.root.items.item.length;
console.log(`[XML处理] 解析完成,包含 ${count} 个条目`);
performanceMonitor.end('解析XML');
return result;
}
/**
* 生成测试用的大型 JSON 文件
*/
async generateLargeJson(filePath, itemCount = 50000) {
performanceMonitor.start('生成大JSON文件');
return new Promise((resolve, reject) => {
const stream = fs.createWriteStream(filePath);
stream.write('[\n'); // 开始 JSON 数组
let i = 0;
const writeNext = () => {
let ok = true;
do {
i++;
const item = {
id: i,
data: `Data_${i}`,
timestamp: Date.now(),
nested: { info: `Nested info ${i}` }
};
const isLast = i === itemCount;
const content = JSON.stringify(item) + (isLast ? '' : ',\n');
if (isLast) {
stream.write(content);
stream.write('\n]'); // 结束 JSON 数组
stream.end();
} else {
ok = stream.write(content);
}
} while (i < itemCount && ok);
if (i < itemCount) {
stream.once('drain', writeNext);
}
};
writeNext();
stream.on('finish', () => {
performanceMonitor.end('生成大JSON文件');
console.log(`[JSON处理] 已生成大JSON文件: ${filePath}`);
resolve();
});
stream.on('error', reject);
});
}
/**
* 流式解析大型 JSON 文件
* 使用 stream-json 避免将整个大数组加载进内存
*/
async processLargeJsonStream(filePath) {
performanceMonitor.start('流式处理大JSON');
return new Promise((resolve, reject) => {
let counter = 0;
// 构建流处理链:读取 -> 解析 -> 提取数组元素 -> 处理每个对象
const pipeline = chain([
fs.createReadStream(filePath),
parser(),
StreamArray.streamArray(), // 假设顶层是一个数组
]);
pipeline.on('data', (data) => {
// data 结构: { key: number, value: object }
counter++;
// 模拟处理逻辑,比如每处理 10000 条打印一次进度
if (counter % 10000 === 0) {
// console.log(`[JSON流] 已处理 ${counter} 条记录...`);
}
});
pipeline.on('end', () => {
console.log(`[JSON流] 处理完成,共计 ${counter} 条记录`);
performanceMonitor.end('流式处理大JSON');
resolve();
});
pipeline.on('error', (err) => {
console.error('[JSON流] 处理出错:', err);
reject(err);
});
});
}
}
module.exports = new XmlJsonHandler();