pflaenz.li/pflaenzli/pflaenzli/utils/load_plz.py

import os
from urllib import request
import hashlib
import shutil
import pandas as pd

from pflaenzli.models import Plz


FILENAME = "AMTOVZ_CSV_WGS84"
FILEPATH = os.path.join(FILENAME, f"{FILENAME}.csv")


def add_arguments(parser):
    parser.add_argument("--force", action="store_true", required=False)

def load_plz(apps, schema_editor, force = True):
    get_index()
    if force or not hash_matches():
        create_objects()


def hash_file(filename):
    h = hashlib.sha1()

    with open(filename,'rb') as file:
        chunk = 0
        while chunk != b'':
            # read only 1024 bytes at a time
            chunk = file.read(1024)
            h.update(chunk)

    return h.hexdigest()


def get_index(url="https://data.geo.admin.ch/ch.swisstopo-vd.ortschaftenverzeichnis_plz/ortschaftenverzeichnis_plz/ortschaftenverzeichnis_plz_4326.csv.zip"):
    request.urlretrieve(url, 'index.zip')
    shutil.unpack_archive('index.zip')


def hash_matches(file=FILEPATH, hashfile="hash"):
    if os.path.exists(hashfile):
        with open(hashfile, 'r+') as f:
            old_hash = f.read()
            new_hash = hash_file(file)

            if old_hash == new_hash:
                return True

            f.truncate(0)
            f.write(new_hash)
    else:
        with open(hashfile, 'w') as f:
            f.write(hash_file(file))

    return False


def create_objects(file=FILEPATH):
    df = pd.read_csv(file, sep=";")
    objects = [Plz(plz=row['PLZ'], lat=row['N'], lon=row['E'], name=row['Ortschaftsname']) for index, row in df.iterrows()]

    Plz.objects.all().delete()
    Plz.objects.bulk_create(objects)