| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| export function extractLatexMetadata(latexContent) { |
| const metadata = {}; |
|
|
| |
| const titleMatch = latexContent.match(/\\title\s*\{\s*([^}]+)\s*\}/s); |
| if (titleMatch) { |
| metadata.title = titleMatch[1] |
| .replace(/\\[a-zA-Z]+/g, '') |
| .replace(/\n/g, ' ') |
| .trim(); |
| } |
|
|
| |
| const authors = []; |
| const authorMatches = latexContent.matchAll(/\\authorOne\[[^\]]*\]\{([^}]+)\}/g); |
|
|
| for (const match of authorMatches) { |
| const fullAuthorInfo = match[1]; |
|
|
| |
| const affiliations = []; |
| if (fullAuthorInfo.includes('\\ensps')) { |
| affiliations.push(1); |
| } |
| if (fullAuthorInfo.includes('\\oxford')) { |
| affiliations.push(1); |
| } |
| if (fullAuthorInfo.includes('\\hf')) { |
| affiliations.push(2); |
| } |
|
|
| |
| let authorName = fullAuthorInfo |
| .replace(/\\ensps/g, '') |
| .replace(/\\hf/g, '') |
| .replace(/\\oxford/g, '') |
| .replace(/\\[a-zA-Z]+/g, '') |
| .replace(/\s+/g, ' ') |
| .trim(); |
|
|
| |
| if (authorName && authorName !== '...') { |
| authors.push({ |
| name: authorName, |
| affiliations: affiliations.length > 0 ? affiliations : [2] |
| }); |
| } |
| } |
|
|
| if (authors.length > 0) { |
| metadata.authors = authors; |
| } |
|
|
| |
| const contributionMatch = latexContent.match(/\\contribution\[\]\{([^}]+)\}/); |
| if (contributionMatch) { |
| const contributionText = contributionMatch[1]; |
| |
| |
| const affiliations = []; |
| |
| |
| const parts = contributionText |
| .split(/[,;]/) |
| .map(part => part.trim()) |
| .filter(part => part.length > 0); |
| |
| for (const part of parts) { |
| |
| const cleanName = part |
| .replace(/\\[a-zA-Z]+/g, '') |
| .replace(/\s+/g, ' ') |
| .trim(); |
| |
| if (cleanName && cleanName.length > 0) { |
| affiliations.push({ |
| name: cleanName |
| }); |
| } |
| } |
| |
| if (affiliations.length > 0) { |
| metadata.affiliations = affiliations; |
| } |
| } |
| |
| |
| if (!metadata.affiliations || metadata.affiliations.length === 0) { |
| metadata.affiliations = [ |
| { |
| name: "École Normale Supérieure Paris-Saclay" |
| }, |
| { |
| name: "University of Oxford" |
| }, |
| { |
| name: "Hugging Face" |
| } |
| ]; |
| } |
|
|
| |
| const datePatterns = [ |
| /\\date\s*\{([^}]+)\}/, |
| /\\newcommand\s*\{\\date\}\s*\{([^}]+)\}/, |
| ]; |
|
|
| for (const pattern of datePatterns) { |
| const dateMatch = latexContent.match(pattern); |
| if (dateMatch) { |
| metadata.published = dateMatch[1].trim(); |
| break; |
| } |
| } |
|
|
| |
| if (!metadata.published) { |
| metadata.published = new Date().toLocaleDateString('en-US', { |
| year: 'numeric', |
| month: 'short', |
| day: '2-digit' |
| }); |
| } |
|
|
| return metadata; |
| } |
|
|
| |
| |
| |
| |
| |
| export function generateFrontmatter(metadata) { |
| let frontmatter = '---\n'; |
|
|
| |
| if (metadata.title) { |
| frontmatter += `title: "${metadata.title}"\n`; |
| } |
|
|
| |
| if (metadata.authors && metadata.authors.length > 0) { |
| frontmatter += 'authors:\n'; |
| metadata.authors.forEach(author => { |
| frontmatter += ` - name: "${author.name}"\n`; |
| if (author.url) { |
| frontmatter += ` url: "${author.url}"\n`; |
| } |
| frontmatter += ` affiliations: [${author.affiliations.join(', ')}]\n`; |
| }); |
| } |
|
|
| |
| if (metadata.affiliations && metadata.affiliations.length > 0) { |
| frontmatter += 'affiliations:\n'; |
| metadata.affiliations.forEach((affiliation, index) => { |
| frontmatter += ` - name: "${affiliation.name}"\n`; |
| if (affiliation.url) { |
| frontmatter += ` url: "${affiliation.url}"\n`; |
| } |
| }); |
| } |
|
|
| |
| if (metadata.published) { |
| frontmatter += `published: "${metadata.published}"\n`; |
| } |
|
|
| |
| if (metadata.doi) { |
| frontmatter += `doi: "${metadata.doi}"\n`; |
| } |
|
|
| if (metadata.description) { |
| frontmatter += `description: "${metadata.description}"\n`; |
| } |
|
|
| if (metadata.licence) { |
| frontmatter += `licence: >\n ${metadata.licence}\n`; |
| } |
|
|
| if (metadata.tags && metadata.tags.length > 0) { |
| frontmatter += 'tags:\n'; |
| metadata.tags.forEach(tag => { |
| frontmatter += ` - ${tag}\n`; |
| }); |
| } |
|
|
| |
| frontmatter += 'tableOfContentsAutoCollapse: true\n'; |
| frontmatter += '---\n\n'; |
|
|
| return frontmatter; |
| } |
|
|
| |
| |
| |
| |
| |
| export function extractAndGenerateFrontmatter(latexContent) { |
| const metadata = extractLatexMetadata(latexContent); |
| return generateFrontmatter(metadata); |
| } |
|
|