Skip to content
Snippets Groups Projects
cit.py 10.8 KiB
Newer Older
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
# Voraussetzungen
#- Python 3.x
#- bibtexparser (Installieren Sie es mit `pip install bibtexparser`)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
# use like:  python3 cit.py -i bibtex.bib -s 1
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
import re
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
problemlist = []

def main(): 
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    print("Starting program...")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    try:
        import bibtexparser
    except ImportError:
        print("The package 'bibtexparser' is not installed. Install with: pip install bibtexparser")
        exit(1)
    try:
        import argparse
    except ImportError:
        print("The package  'argparse' is not installed.")
        exit(1)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    try:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        import re
    except ImportError:
        print("The package  're' is not installed.")
        exit(1)


Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    try:
        #reading command line input and parsing it 
        parser = argparse.ArgumentParser(
                            prog='HTML Citations',
                            description='create acessible HTML Citations from bib files')
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        parser.add_argument('-i','--input', required=True, help="path to source bib")
        parser.add_argument('-s','--startnumber', required=True, help="number from which to start numbering") 
        
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        args = parser.parse_args()
        print("Source: " + args.input)
    except argparse.ArgumentError as e:
            print(f"Argument parsing error: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    print("Reading file...")
    #reading and parsing the file
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    try:
        with open(args.input, 'r') as file:
            file_content = file.read()
        print("Parsing file...")
        try:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            library = bibtexparser.loads(file_content)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            #opening output file 
            try:
                with open('output.txt', 'w') as out:
                    length = len(library.entries)
                    print("found " + str(length) + " entries")
                    ran = range(length)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    startnum = int(args.startnumber); 
                    count = 0; 
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    #processing every entry and writing the dictionary for it 
                    for x in ran:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

                        print("\n Initializing empty dictionary for entry "+ str(startnum+count) + "...")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        dictio = {}
                        en_x = library.entries[x]
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        print("Filling dictionary for entry "+ str(startnum+count) + "")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        for key, value in en_x.items(): 
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                            key_low = key.lower()
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                            dictio[key_low] = value
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        print("Checking Entry type of "+ str(startnum+count) + "")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        if en_x['ENTRYTYPE'] == "article":
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                            articleHTML(dictio, (startnum+count), out)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        elif en_x['ENTRYTYPE'] == "misc":
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                            miscHTML(dictio, (startnum+count), out)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        elif en_x['ENTRYTYPE'] == "book":
                            bookHTML(dictio, (startnum+count), out)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        elif en_x['ENTRYTYPE'] == "inbook":
                            bookHTML(dictio, (startnum+count), out)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        count += 1; 
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            except Exception as e:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                print(f"An unexpected error occurred: {e} in line 85")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        except Exception as e:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            print(f"An unexpected error occurred: {e} in line 87")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    except FileNotFoundError:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            print(f"Error: The file '{args.input}' was not found. line 89")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    if len(problemlist)>0:
        print("- - - - - - - - - - - - - - - - - ")
        print("REMAINING ERRORS:")
        
        for p in problemlist:
            print(p)
    else: 
        print("DONE")


Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
def makeauthors(authors, out):
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    authors = authors.replace(" and ", "|").strip()  # Ersetzen und Whitespace entfernen
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    autlist = authors.split("|")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    
    # Maximale Anzahl der anzuzeigenden Autoren
    max_authors = 6
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    for i, a in enumerate(autlist):
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        try:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            first = None
            last = None
            name = None
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            a = a.strip()  # Whitespace entfernen
            
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            if ',' in a:
                s = a.split(", ")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                if len(s) > 1:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    first_names = s[1].split()  # Teilt den Vornamen in einzelne Namen auf
                    # Nur die Initialen verwenden
                    initials = '. '.join([n[0] for n in first_names]) + '.' if first_names else ''
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    last = s[0]
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    name = last + ", " + initials
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                else:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    last = s[0].strip()  # Entferne Whitespace
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    name = last + ", "
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            else:
                s = a.split()
                if len(s) == 2:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    first = s[0].strip()  # Entferne Whitespace
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    first_sh = first[0]
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    last = s[1].strip()  # Entferne Whitespace
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    name = last + ", " + first_sh + "."
                else:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    # Falls es mehrere Vornamen gibt, sie in Initialen umwandeln
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    leng = len(s)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    last = s[leng-1].strip()  # Entferne Whitespace
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    first = '. '.join([n[0] for n in s[:-1]]) + '.'  # Nur Initialen der Vornamen
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    name = last + ", " + first
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            
            # Schreibe den Namen in die Ausgabedatei
            if i < max_authors:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                out.write("\t" + "\t" + "<span property=\"schema:Name\"> " + name + "</span>" + "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            # Wenn wir mehr als 6 Autoren haben, schreibe "et al." nach dem 6. Autor
            if i == max_authors:
                out.write("\t" + "\t" + "<span property=\"schema:Name\"> et al.</span>" + "\n")
                break  # Stoppe die Schleife, nachdem "et al." hinzugefügt wurde
            
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        except Exception as e:
            print(f"An unexpected error occurred: {e} see " + a)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed


Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
def articleHTML(dictio, x, out):
    print("Writing html code for article "+ str(x) + "...")
    out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n")
    out.write("<li typeof=\"schema:ScolarlyArticle\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n")
    out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Person\">"+ "\n")
    
    print("Just a sec, separating authors...")
    authors = dictio['author']
    makeauthors(authors, out)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" +"</span>"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    title = dictio['title'].replace('{', '').replace('}', '')
    out.write("\t" + "<span property=\"schema:name\">&nbsp;"+ title + "</span>. "+ "\n")
    out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\"> "+ dictio['journal'] +"</i>"+ "\n")
    out.write("\t" +"<b property=\"issueNumber\" typeof=\"PublicationIssue\"> "+dictio['volume']+"</b>,&nbsp;"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    print("Getting pages...")
    try:
        pages = dictio['pages']
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        if pages is not None and len(pages) > 0:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            # Überprüfen, ob die Seitenangabe nur aus Zahlen und Bindestrichen besteht
                if '-' in pages or '' in pages or '--' in pages:
                    pag = re.split('--|-|–', pages)
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    begin = pag[0].strip()
                    end = pag[1].strip()
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    out.write("\t" + "<span property=\"schema:pageBegin\"> "+ begin +"</span>-<span property=\"schema:pageEnd\">"+ end + "</span>&nbsp;"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                else:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    if re.match(r'^\d+(-\d+)?$', pages):  # Check for typical numeric page ranges
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                        out.write("\t" + "<span property=\"schema:pageBegin\">"+ pages +"</span>&nbsp;"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
                    else:
                        # Seitenangabe ist nicht numerisch, als fehlend behandeln
                        print(f"Non-numeric page information detected ('{pages}'). Treating as missing.")
                        problemlist.append(f"Non-numeric page info at entry {x}")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        else:
            print("Sorry, no page information")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
            problemlist.append("Check for missing page info at " + str(x))
    except KeyError:
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        print("Sorry, no page information")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        problemlist.append("Check for missing page info at " + str(x))
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    year = dictio['year']
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" +"(<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\" " + year + "\">"+year+"</time>)."+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    
    try:
        doi = dictio['doi']
        out.write("\t" +"<a className=\"doi\" href=\"https://doi.org/"+doi+"\"> doi: "+doi+"</a>"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    except KeyError as e:
        print("Sorry, no doi information")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
        problemlist.append("Check for missing doi info at " + str (x))
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed

    out.write("</li>" + "\n"+ "\n")


def miscHTML(dictio, x, out):
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    print("Writing html code for entry "+ str(x) + "...")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("<li typeof=\"schema:WebPage\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Organisation\">"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    aut = dictio['author']
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" + "\t" +"<span property=\"schema:Name\"> " + aut + "</span>."+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" +"</span>"+ "\n")
    out.write("\t" + "<span property=\"schema:name\">"+dictio['title']+ ".</span>"+ "\n")
    out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\">"+ dictio['howpublished'] +"</i>"+ "\n")
    year = dictio['year']
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" +"&nbsp;(<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\"" + year + "\">"+year+"</time>)."+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("</li>" + "\n"+ "\n")

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
def bookHTML(dictio, x, out):
    print("Writing html code for entry "+ str(x) + "...")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("{/*<!-- Citation num " + str(x) + "--> */}" + "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("<li typeof=\"schema:Book\" role=\"doc-biblioentry\" property=\"schema:citation\" id=\"desc-" + str(x) + "\">"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" + "<span property=\"schema:author\" typeof=\"schema:Organisation\">"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    print("Just a sec, separating authors...")
    authors = dictio['author']
    makeauthors(authors, out)
   # out.write("\t" + "\t" +"<span property=\"schema:Name\"> " + aut + "</span>."+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" +"</span>"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    if 'title' in dictio:
        out.write("\t" + "<span property=\"schema:name\">&nbsp;"+dictio['title']+ ".</span>"+ "\n")
    elif 'booktitle' in dictio:
        out.write("\t" + "<span property=\"schema:name\">&nbsp;"+dictio['booktitle']+ ".</span>"+ "\n")
    else:
        print(f"No title or booktitle found for entry {x}")
        problemlist.append(f"Check for missing title or booktitle at entry {x}")

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" +"<i property=\"schema:publisher\" typeof=\"schema:Organization\">&nbsp;"+ dictio['publisher'] +"</i>"+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    year = dictio['year']
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("\t" + "&nbsp;(<time property=\"schema:datePublished\" datatype=\"xsd:gYear\" dateTime=\"" + year + "\">"+year+"</time>)."+ "\n")
Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
    out.write("</li>" + "\n"+ "\n")

Liliana Sanfilippo's avatar
Liliana Sanfilippo committed
main()