From b8547cd87703c0d674722fc6837daae7c7c10258 Mon Sep 17 00:00:00 2001 From: Jannis Portmann Date: Sat, 10 Feb 2024 00:24:57 +0100 Subject: [PATCH 1/3] Handle nonexistent and multiple PLZ --- pflaenzli/pflaenzli/utils/distance.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pflaenzli/pflaenzli/utils/distance.py b/pflaenzli/pflaenzli/utils/distance.py index 9e83b74..2c03ea8 100644 --- a/pflaenzli/pflaenzli/utils/distance.py +++ b/pflaenzli/pflaenzli/utils/distance.py @@ -20,11 +20,18 @@ def filter_by_distance(qs, filter_zipcode, max_dist): try: filter_plz = Plz.objects.get(plz=filter_zipcode) + except Plz.MultipleObjectsReturned: + filter_plz = Plz.objects.filter(plz=filter_zipcode)[0] except Plz.DoesNotExist: return filtered_offers for offer in qs: - offer_plz = Plz.objects.get(plz=offer.zipcode) + try: + offer_plz = Plz.objects.get(plz=offer.zipcode) + except Plz.MultipleObjectsReturned: + offer_plz = Plz.objects.filter(plz=offer.zipcode)[0] + except Plz.DoesNotExist: + offer_plz = None d = calculate_distance(offer_plz, filter_plz) if d is not None and d <= max_dist: From 2f3de9b91ea81090c5092e9716407691349e84c5 Mon Sep 17 00:00:00 2001 From: Jannis Portmann Date: Sat, 10 Feb 2024 12:18:29 +0100 Subject: [PATCH 2/3] Implement new PLZ index --- README.md | 4 +- entrypoint.sh | 1 - .../management/commands/getplzindex.py | 78 ------------------- ...7_alter_plz_name_squashed_0008_load_plz.py | 24 ++++++ pflaenzli/pflaenzli/models.py | 2 +- pflaenzli/pflaenzli/utils/load_plz.py | 65 ++++++++++++++++ 6 files changed, 92 insertions(+), 82 deletions(-) delete mode 100644 pflaenzli/pflaenzli/management/commands/getplzindex.py create mode 100644 pflaenzli/pflaenzli/migrations/0007_alter_plz_name_squashed_0008_load_plz.py create mode 100644 pflaenzli/pflaenzli/utils/load_plz.py diff --git a/README.md b/README.md index 48f68d7..a5608a2 100644 --- a/README.md +++ b/README.md @@ -42,5 +42,5 @@ python manage.py getplzindex --force To get started with development, see [DEVELOPMENT.md](DEVELOPMENT.md) ## Open Source Data -For calculating distances between zip codes, the `PLZ_Verzeichnis` is used. -Source: https://opendata.swiss/de/dataset/plz_verzeichnis \ No newline at end of file +For calculating distances between zip codes, the `Official index of cities and towns including postal codes and perimeter ` is used. +Source: https://opendata.swiss/en/dataset/amtliches-ortschaftenverzeichnis-mit-postleitzahl-und-perimeter \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index 82e50ab..9b3f383 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -3,7 +3,6 @@ python manage.py migrate python manage.py loco python manage.py makemessages -l en -l de python manage.py compilemessages -f -python manage.py getplzindex nginx diff --git a/pflaenzli/pflaenzli/management/commands/getplzindex.py b/pflaenzli/pflaenzli/management/commands/getplzindex.py deleted file mode 100644 index b3f000e..0000000 --- a/pflaenzli/pflaenzli/management/commands/getplzindex.py +++ /dev/null @@ -1,78 +0,0 @@ -import json -import os -from urllib import request - -import pandas as pd -from django.core.management.base import BaseCommand -from pflaenzli_django.settings import BASE_DIR -from pflaenzli.models import Plz - - -class Command(BaseCommand): - help = 'Get the zip code index from post and compile it to a dataframe pickle' - - def add_arguments(self, parser): - parser.add_argument("--force", action="store_true", required=False) - - def handle(self, *args, **options): - self.parse_data(*self.download_geojson(api='v2', data='v2', force=options["force"]), force=options["force"]) - - def download_geojson(self, api, data, force=False): - file = f'plz_verzeichnis_{data}.json' - if os.path.exists(file) and not force: - self.stdout.write('File already downloaded.') - self.stdout.write(self.style.SUCCESS( - 'Skipping...\n')) - exists = True - else: - self.stdout.write('Downloading geojson...') - url = f'https://swisspost.opendatasoft.com/api/{api}/catalog/datasets/plz_verzeichnis_{data}/exports/geojson' - request.urlretrieve(url, file) - self.stdout.write(self.style.SUCCESS('Done!\n')) - exists = False - - return file, exists - - def parse_data(self, file, exists, force=False): - if exists and not force: - self.stdout.write(self.style.WARNING( - 'Nothing was done, if you want to redownload the PLZ index, use the --force option.\n')) - return - - self.stdout.write('Opening file...') - # Load the GeoJSON data for the zip codes - with open(file, encoding='UTF-8') as f: - full_data = json.load(f) - self.stdout.write(self.style.SUCCESS('Done!\n')) - - self.stdout.write('Deleting existing data...') - Plz.objects.all().delete() - self.stdout.write(self.style.SUCCESS('Done!\n')) - - self.stdout.write('Parsing file and add new data...') - - for plz_entry in full_data['features']: - plz_entry = plz_entry['properties'] - try: - plz = plz_entry['postleitzahl'] - except (AttributeError, TypeError, ValueError): - continue - - try: - lat = plz_entry['geo_point_2d']['lat'] - lon = plz_entry['geo_point_2d']['lon'] - except (AttributeError, TypeError): - continue - - if plz is None or lat is None or lon is None: - continue - - try: - name = plz_entry['ortbez27'] - except (KeyError, AttributeError, TypeError): - name = None - - plz, _ = Plz.objects.get_or_create(plz=int(plz), lat=lat, lon=lon, name=name) - - self.stdout.write(self.style.SUCCESS('Wrote PLZ data to the databse successfully\n')) - self.stdout.write(self.style.SUCCESS('Done!')) diff --git a/pflaenzli/pflaenzli/migrations/0007_alter_plz_name_squashed_0008_load_plz.py b/pflaenzli/pflaenzli/migrations/0007_alter_plz_name_squashed_0008_load_plz.py new file mode 100644 index 0000000..dbf4af7 --- /dev/null +++ b/pflaenzli/pflaenzli/migrations/0007_alter_plz_name_squashed_0008_load_plz.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.10 on 2024-02-10 11:16 + +from django.db import migrations, models +from pflaenzli.utils.load_plz import load_plz + + +class Migration(migrations.Migration): + + replaces = [("pflaenzli", "0007_alter_plz_name"), ("pflaenzli", "0008_load_plz")] + + dependencies = [ + ("pflaenzli", "0006_plz"), + ] + + operations = [ + migrations.AlterField( + model_name="plz", + name="name", + field=models.CharField(max_length=40), + ), + migrations.RunPython( + code=load_plz, + ), + ] diff --git a/pflaenzli/pflaenzli/models.py b/pflaenzli/pflaenzli/models.py index aa50a60..7dae91c 100644 --- a/pflaenzli/pflaenzli/models.py +++ b/pflaenzli/pflaenzli/models.py @@ -18,7 +18,7 @@ class Plz(models.Model): plz = models.IntegerField(verbose_name='PLZ') lat = models.DecimalField(max_digits=8, decimal_places=6) lon = models.DecimalField(max_digits=8, decimal_places=6) - name = models.CharField(max_length=27) + name = models.CharField(max_length=40) class Offer(models.Model): diff --git a/pflaenzli/pflaenzli/utils/load_plz.py b/pflaenzli/pflaenzli/utils/load_plz.py new file mode 100644 index 0000000..76d5026 --- /dev/null +++ b/pflaenzli/pflaenzli/utils/load_plz.py @@ -0,0 +1,65 @@ +import os +from urllib import request +import hashlib +import shutil +import pandas as pd + +from pflaenzli.models import Plz + + + +FILENAME = "AMTOVZ_CSV_WGS84" +FILEPATH = os.path.join(FILENAME, f"{FILENAME}.csv") + + +def add_arguments(parser): + parser.add_argument("--force", action="store_true", required=False) + +def load_plz(apps, schema_editor, force = True): + get_index() + if force or not hash_matches(): + create_objects() + + +def hash_file(filename): + h = hashlib.sha1() + + with open(filename,'rb') as file: + chunk = 0 + while chunk != b'': + # read only 1024 bytes at a time + chunk = file.read(1024) + h.update(chunk) + + return h.hexdigest() + + +def get_index(url="https://data.geo.admin.ch/ch.swisstopo-vd.ortschaftenverzeichnis_plz/ortschaftenverzeichnis_plz/ortschaftenverzeichnis_plz_4326.csv.zip"): + request.urlretrieve(url, 'index.zip') + shutil.unpack_archive('index.zip') + + +def hash_matches(file=FILEPATH, hashfile="hash"): + if os.path.exists(hashfile): + with open(hashfile, 'r+') as f: + old_hash = f.read() + new_hash = hash_file(file) + + if old_hash == new_hash: + return True + + f.truncate(0) + f.write(new_hash) + else: + with open(hashfile, 'w') as f: + f.write(hash_file(file)) + + return False + + +def create_objects(file=FILEPATH): + df = pd.read_csv(file, sep=";") + objects = [Plz(plz=row['PLZ'], lat=row['N'], lon=row['E'], name=row['Ortschaftsname']) for index, row in df.iterrows()] + + Plz.objects.all().delete() + Plz.objects.bulk_create(objects) From bd70276bc2df21de31de2ad18d98e7515e0738aa Mon Sep 17 00:00:00 2001 From: Jannis Portmann Date: Sat, 10 Feb 2024 12:23:03 +0100 Subject: [PATCH 3/3] Update requirements --- requirements.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8d13275..7c76a07 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ -crispy-bootstrap5==0.7 -Django==4.1.11 +crispy-bootstrap5==2023.10 +Django==4.2.10 djangoloco==1.0 -django-bootstrap5==23.3 -django-crispy-forms==2.0 +django-bootstrap5==23.4 +django-crispy-forms==2.1 django-jquery==3.1.0 django-friendly-captcha==0.1.8 -geopy==2.4.0 +geopy==2.4.1 gunicorn==21.2.0 -fontawesomefree==6.4.2 -pandas==2.1.0 -Pillow==10.0.1 -psycopg2-binary==2.9.7 -python-dotenv==1.0.0 +fontawesomefree==6.5.1 +pandas==2.2.0 +Pillow==10.2.0 +psycopg2-binary==2.9.9 +python-dotenv==1.0.1