# Voraussetzungen #- Python 3.x #- bibtexparser (Installieren Sie es mit `pip install bibtexparser`) # use like: python3 cit.py -i bibtex.bib -s 1 import re problemlist = [] def main(): print("Starting program...") try: import bibtexparser except ImportError: print("The package 'bibtexparser' is not installed. Install with: pip install bibtexparser") exit(1) try: import argparse except ImportError: print("The package 'argparse' is not installed.") exit(1) try: import re except ImportError: print("The package 're' is not installed.") exit(1) try: #reading command line input and parsing it parser = argparse.ArgumentParser( prog='HTML Citations', description='create acessible HTML Citations from bib files') parser.add_argument('-i','--input', required=True, help="path to source bib") parser.add_argument('-s','--startnumber', required=True, help="number from which to start numbering") args = parser.parse_args() print("Source: " + args.input) except argparse.ArgumentError as e: print(f"Argument parsing error: {e}") except Exception as e: print(f"An unexpected error occurred: {e}") print("Reading file...") #reading and parsing the file try: with open(args.input, 'r') as file: file_content = file.read() print("Parsing file...") try: library = bibtexparser.loads(file_content) #opening output file try: with open('output.txt', 'w') as out: length = len(library.entries) print("found " + str(length) + " entries") ran = range(length) startnum = int(args.startnumber); count = 0; #processing every entry and writing the dictionary for it for x in ran: print("\n Initializing empty dictionary for entry "+ str(startnum+count) + "...") dictio = {} en_x = library.entries[x] print("Filling dictionary for entry "+ str(startnum+count) + "") for key, value in en_x.items(): key_low = key.lower() dictio[key_low] = value print("Checking Entry type of "+ str(startnum+count) + "") if en_x['ENTRYTYPE'] == "article": articleHTML(dictio, (startnum+count), out) elif en_x['ENTRYTYPE'] == "misc": miscHTML(dictio, (startnum+count), out) elif en_x['ENTRYTYPE'] == "book": bookHTML(dictio, (startnum+count), out) elif en_x['ENTRYTYPE'] == "inbook": bookHTML(dictio, (startnum+count), out) count += 1; except Exception as e: print(f"An unexpected error occurred: {e} in line 85") except Exception as e: print(f"An unexpected error occurred: {e} in line 87") except FileNotFoundError: print(f"Error: The file '{args.input}' was not found. line 89") if len(problemlist)>0: print("- - - - - - - - - - - - - - - - - ") print("REMAINING ERRORS:") for p in problemlist: print(p) else: print("DONE") def makeauthors(authors, out): authors = authors.replace(" and ", "|").strip() # Ersetzen und Whitespace entfernen autlist = authors.split("|") # Maximale Anzahl der anzuzeigenden Autoren max_authors = 6 for i, a in enumerate(autlist): try: first = None last = None name = None a = a.strip() # Whitespace entfernen if ',' in a: s = a.split(", ") if len(s) > 1: first_names = s[1].split() # Teilt den Vornamen in einzelne Namen auf # Nur die Initialen verwenden initials = '. '.join([n[0] for n in first_names]) + '.' if first_names else '' last = s[0] name = last + ", " + initials else: last = s[0].strip() # Entferne Whitespace name = last + ", " else: s = a.split() if len(s) == 2: first = s[0].strip() # Entferne Whitespace first_sh = first[0] last = s[1].strip() # Entferne Whitespace name = last + ", " + first_sh + "." else: # Falls es mehrere Vornamen gibt, sie in Initialen umwandeln leng = len(s) last = s[leng-1].strip() # Entferne Whitespace first = '. '.join([n[0] for n in s[:-1]]) + '.' # Nur Initialen der Vornamen name = last + ", " + first # Schreibe den Namen in die Ausgabedatei if i < max_authors: out.write("\t" + "\t" + "<span property=\"schema:Name\"> " + name + "</span>" + "\n") # Wenn wir mehr als 6 Autoren haben, schreibe "et al." nach dem 6. Autor if i == max_authors: out.write("\t" + "\t" + "<span property=\"schema:Name\"> et al.</span>" + "\n") break # Stoppe die Schleife, nachdem "et al." hinzugefügt wurde except Exception as e: print(f"An unexpected error occurred: {e} see " + a) def articleHTML(dictio, x, out): print("Writing html code for article "+ str(x) + "...") out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n") out.write("<li typeof=\"schema:ScolarlyArticle\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n") out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Person\">"+ "\n") print("Just a sec, separating authors...") authors = dictio['author'] makeauthors(authors, out) out.write("\t" +"</span>"+ "\n") title = dictio['title'].replace('{', '').replace('}', '') out.write("\t" + "<span property=\"schema:name\"> "+ title + "</span>. "+ "\n") out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\"> "+ dictio['journal'] +"</i>"+ "\n") out.write("\t" +"<b property=\"issueNumber\" typeof=\"PublicationIssue\"> "+dictio['volume']+"</b>, "+ "\n") print("Getting pages...") try: pages = dictio['pages'] if pages is not None and len(pages) > 0: # Überprüfen, ob die Seitenangabe nur aus Zahlen und Bindestrichen besteht if '-' in pages or '–' in pages or '--' in pages: pag = re.split('--|-|–', pages) begin = pag[0].strip() end = pag[1].strip() out.write("\t" + "<span property=\"schema:pageBegin\"> "+ begin +"</span>-<span property=\"schema:pageEnd\">"+ end + "</span> "+ "\n") else: if re.match(r'^\d+(-\d+)?$', pages): # Check for typical numeric page ranges out.write("\t" + "<span property=\"schema:pageBegin\">"+ pages +"</span> "+ "\n") else: # Seitenangabe ist nicht numerisch, als fehlend behandeln print(f"Non-numeric page information detected ('{pages}'). Treating as missing.") problemlist.append(f"Non-numeric page info at entry {x}") else: print("Sorry, no page information") problemlist.append("Check for missing page info at " + str(x)) except KeyError: print("Sorry, no page information") problemlist.append("Check for missing page info at " + str(x)) year = dictio['year'] out.write("\t" +"(<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\" " + year + "\">"+year+"</time>)."+ "\n") try: doi = dictio['doi'] out.write("\t" +"<a className=\"doi\" href=\"https://doi.org/"+doi+"\"> doi: "+doi+"</a>"+ "\n") except KeyError as e: print("Sorry, no doi information") problemlist.append("Check for missing doi info at " + str (x)) out.write("</li>" + "\n"+ "\n") def miscHTML(dictio, x, out): print("Writing html code for entry "+ str(x) + "...") out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n") out.write("<li typeof=\"schema:WebPage\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n") out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Organisation\">"+ "\n") aut = dictio['author'] out.write("\t" + "\t" +"<span property=\"schema:Name\"> " + aut + "</span>."+ "\n") out.write("\t" +"</span>"+ "\n") out.write("\t" + "<span property=\"schema:name\">"+dictio['title']+ ".</span>"+ "\n") out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\">"+ dictio['howpublished'] +"</i>"+ "\n") year = dictio['year'] out.write("\t" +" (<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\"" + year + "\">"+year+"</time>)."+ "\n") out.write("</li>" + "\n"+ "\n") def bookHTML(dictio, x, out): print("Writing html code for entry "+ str(x) + "...") out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n") out.write("<li typeof=\"schema:Book\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n") out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Organisation\">"+ "\n") print("Just a sec, separating authors...") authors = dictio['author'] makeauthors(authors, out) # out.write("\t" + "\t" +"<span property=\"schema:Name\"> " + aut + "</span>."+ "\n") out.write("\t" +"</span>"+ "\n") if 'title' in dictio: out.write("\t" + "<span property=\"schema:name\"> "+dictio['title']+ ".</span>"+ "\n") elif 'booktitle' in dictio: out.write("\t" + "<span property=\"schema:name\"> "+dictio['booktitle']+ ".</span>"+ "\n") else: print(f"No title or booktitle found for entry {x}") problemlist.append(f"Check for missing title or booktitle at entry {x}") out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\"> "+ dictio['publisher'] +"</i>"+ "\n") year = dictio['year'] out.write("\t" + " (<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\"" + year + "\">"+year+"</time>)."+ "\n") out.write("</li>" + "\n"+ "\n") main()