Forked from
2024 Competition / Bielefeld-CeBiTec
1160 commits behind the upstream repository.
-
Philip Mundt authoredPhilip Mundt authored
cit.py 10.48 KiB
# Voraussetzungen
#- Python 3.x
#- bibtexparser (Installieren Sie es mit `pip install bibtexparser`)
# use like: python3 cit.py -i bibtex.bib -s 1
import re
problemlist = []
def main():
print("Starting program...")
try:
import bibtexparser
except ImportError:
print("The package 'bibtexparser' is not installed. Install with: pip install bibtexparser")
exit(1)
try:
import argparse
except ImportError:
print("The package 'argparse' is not installed.")
exit(1)
try:
import re
except ImportError:
print("The package 're' is not installed.")
exit(1)
try:
#reading command line input and parsing it
parser = argparse.ArgumentParser(
prog='HTML Citations',
description='create acessible HTML Citations from bib files')
parser.add_argument('-i','--input', required=True, help="path to source bib")
parser.add_argument('-s','--startnumber', required=True, help="number from which to start numbering")
args = parser.parse_args()
print("Source: " + args.input)
except argparse.ArgumentError as e:
print(f"Argument parsing error: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
print("Reading file...")
#reading and parsing the file
try:
with open(args.input, 'r') as file:
file_content = file.read()
print("Parsing file...")
try:
library = bibtexparser.loads(file_content)
#opening output file
try:
with open('output.txt', 'w') as out:
length = len(library.entries)
print("found " + str(length) + " entries")
ran = range(length)
startnum = int(args.startnumber);
count = 0;
#processing every entry and writing the dictionary for it
for x in ran:
print("\n Initializing empty dictionary for entry "+ str(startnum+count) + "...")
dictio = {}
en_x = library.entries[x]
print("Filling dictionary for entry "+ str(startnum+count) + "")
for key, value in en_x.items():
key_low = key.lower()
dictio[key_low] = value
print("Checking Entry type of "+ str(startnum+count) + "")
if en_x['ENTRYTYPE'] == "article":
articleHTML(dictio, (startnum+count), out)
elif en_x['ENTRYTYPE'] == "misc":
miscHTML(dictio, (startnum+count), out)
elif en_x['ENTRYTYPE'] == "book":
bookHTML(dictio, (startnum+count), out)
elif en_x['ENTRYTYPE'] == "inbook":
bookHTML(dictio, (startnum+count), out)
count += 1;
except Exception as e:
print(f"An unexpected error occurred: {e} in line 85")
except Exception as e:
print(f"An unexpected error occurred: {e} in line 87")
except FileNotFoundError:
print(f"Error: The file '{args.input}' was not found. line 89")
if len(problemlist)>0:
print("- - - - - - - - - - - - - - - - - ")
print("REMAINING ERRORS:")
for p in problemlist:
print(p)
else:
print("DONE")
def makeauthors(authors, out):
authors = authors.replace("\n", " ").replace(" and ", "|").strip() # "and" durch "|" ersetzen und Whitespace entfernen
autlist = authors.split("|")
# Maximale Anzahl der anzuzeigenden Autoren
max_authors = 7
out.write("\t<span property=\"schema:author\" typeof=\"schema:Person\">\n") # Tag für Autoren öffnen
#print(autlist)
for i, a in enumerate(autlist):
try:
a = a.strip() # Whitespace entfernen
# Nachnamen und Vornamen aufteilen
if ',' in a:
s = a.split(", ")
last = s[0].strip() # Nachname
first_names = s[1].strip() if len(s) > 1 else ''
# Initialen für Vornamen erstellen
initials = '. '.join([n[0] for n in first_names.split()]) + '.' if first_names else ''
name = f"{last}, {initials}" if initials else f"{last}, "
else:
s = a.split()
last = s[-1].strip() # Nachname
first = '. '.join([n[0] for n in s[:-1]]) + '.' # Initialen der Vornamen
name = f"{last}, {first}"
# Schreibe den Namen in die Ausgabedatei
if i < max_authors:
out.write(f"\t\t<span property=\"schema:Name\"> {name}</span>\n")
# Wenn wir den 6. Autor erreicht haben, schreibe "et al." nach dem 6. Autor
if i == max_authors:
out.write("\t\t<span property=\"schema:Name\"> et al.</span>\n")
break # Stoppe die Schleife, nachdem "et al." hinzugefügt wurde
except Exception as e:
print(f"An unexpected error occurred: {e} see " + a)
out.write("\t</span>\n") # Tag für Autoren schließen
def articleHTML(dictio, x, out):
print("Writing html code for article "+ str(x) + "...")
out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n")
out.write("<li typeof=\"schema:ScolarlyArticle\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n")
# out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Person\">"+ "\n")
print("Just a sec, separating authors...")
authors = dictio['author']
makeauthors(authors, out)
# out.write("\t" +"</span>"+ "\n")
title = dictio['title'].replace('{', '').replace('}', '')
out.write("\t" + "<span property=\"schema:name\"> "+ title + "</span>. "+ "\n")
out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\"> "+ dictio['journal'] +"</i>"+ "\n")
out.write("\t" +"<b property=\"issueNumber\" typeof=\"PublicationIssue\"> "+dictio['volume']+"</b>"+ "\n")
print("Getting pages...")
try:
pages = dictio['pages']
if pages is not None and len(pages) > 0:
# Überprüfen, ob die Seitenangabe nur aus Zahlen und Bindestrichen besteht
if '-' in pages or '–' in pages or '--' in pages or '–' in pages:
pag = re.split('--|-|–|–', pages)
begin = pag[0].strip()
end = pag[1].strip()
out.write("\t" + ", <span property=\"schema:pageBegin\"> "+ begin +"</span>-<span property=\"schema:pageEnd\">"+ end + "</span> "+ "\n")
else:
if re.match(r'^\d+(-\d+)?$', pages): # Check for typical numeric page ranges
out.write("\t" + "<span property=\"schema:pageBegin\">"+ pages +"</span> "+ "\n")
else:
# Seitenangabe ist nicht numerisch, als fehlend behandeln
print(f"Non-numeric page information detected ('{pages}'). Treating as missing.")
problemlist.append(f"Non-numeric page info at entry {x}")
else:
print("Sorry, no page information")
problemlist.append("Check for missing page info at " + str(x))
except KeyError:
print("Sorry, no page information")
problemlist.append("Check for missing page info at " + str(x))
year = dictio['year']
out.write("\t" +"(<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\" " + year + "\">"+year+"</time>)."+ "\n")
try:
doi = dictio['doi']
out.write("\t" +"<a className=\"doi\" href=\"https://doi.org/"+doi+"\"> doi: "+doi+"</a>"+ "\n")
except KeyError as e:
print("Sorry, no doi information")
problemlist.append("Check for missing doi info at " + str (x))
out.write("</li>" + "\n"+ "\n")
def miscHTML(dictio, x, out):
print("Writing html code for entry "+ str(x) + "...")
out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n")
out.write("<li typeof=\"schema:WebPage\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n")
out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Organisation\">"+ "\n")
aut = dictio['author']
out.write("\t" + "\t" +"<span property=\"schema:Name\"> " + aut + "</span>."+ "\n")
out.write("\t" +"</span>"+ "\n")
out.write("\t" + "<span property=\"schema:name\">"+dictio['title']+ ".</span>"+ "\n")
out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\">"+ dictio['howpublished'] +"</i>"+ "\n")
year = dictio['year']
out.write("\t" +" (<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\"" + year + "\">"+year+"</time>)."+ "\n")
out.write("</li>" + "\n"+ "\n")
def bookHTML(dictio, x, out):
print("Writing html code for entry "+ str(x) + "...")
out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n")
out.write("<li typeof=\"schema:Book\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n")
# out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Organisation\">"+ "\n")
print("Just a sec, separating authors...")
authors = dictio['author']
makeauthors(authors, out)
# out.write("\t" + "\t" +"<span property=\"schema:Name\"> " + aut + "</span>."+ "\n")
# out.write("\t" +"</span>"+ "\n")
if 'title' in dictio:
out.write("\t" + "<span property=\"schema:name\"> "+dictio['title']+ ".</span>"+ "\n")
elif 'booktitle' in dictio:
out.write("\t" + "<span property=\"schema:name\"> "+dictio['booktitle']+ ".</span>"+ "\n")
else:
print(f"No title or booktitle found for entry {x}")
problemlist.append(f"Check for missing title or booktitle at entry {x}")
out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\"> "+ dictio['publisher'] +"</i>"+ "\n")
year = dictio['year']
out.write("\t" + " (<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\"" + year + "\">"+year+"</time>)."+ "\n")
out.write("</li>" + "\n"+ "\n")
main()