diff --git a/pflaenzli/pflaenzli/management/commands/getplzindex.py b/pflaenzli/pflaenzli/management/commands/getplzindex.py new file mode 100644 index 0000000..628379b --- /dev/null +++ b/pflaenzli/pflaenzli/management/commands/getplzindex.py @@ -0,0 +1,68 @@ +import json +import os +from urllib import request + +import geojson +import pandas as pd +from geopy.distance import distance + +from django.core.management.base import BaseCommand + +from pflaenzli_django.settings import BASE_DIR + + +class Command(BaseCommand): + help = 'Get the zip code index from post and compile it to a dataframe pickle' + + def handle(self, *args, **options): + self.parse_data(self.download_geojson(api='v2', data='v2')) + self.stdout.write(self.style.SUCCESS('Done!')) + + def download_geojson(self, api, data): + file = f'plz_verzeichnis_{data}.json' + if os.path.exists(file): + self.stdout.write('File already downloaded. Skipping...\n') + else: + self.stdout.write('Downloading geojson...') + url = f'https://swisspost.opendatasoft.com/api/{api}/catalog/datasets/plz_verzeichnis_{data}/exports/geojson' + request.urlretrieve(url, file) + self.stdout.write(self.style.SUCCESS('Done!\n')) + + return file + + def parse_data(self, file): + self.stdout.write('Opening file...') + # Load the GeoJSON data for the zip codes + with open(file, encoding='UTF-8') as f: + full_data = json.load(f) + self.stdout.write(self.style.SUCCESS('Done!\n')) + + self.stdout.write('Parsing file...') + zip_dict = {} + for plz_entry in full_data['features']: + plz_entry = plz_entry['properties'] + try: + plz = plz_entry['postleitzahl'] + except (AttributeError, TypeError, ValueError): + continue + + try: + lat = plz_entry['geo_point_2d']['lat'] + lon = plz_entry['geo_point_2d']['lon'] + except (AttributeError, TypeError): + continue + + if plz is None or lat is None or lon is None: + continue + + zip_dict[int(plz)] = [lat, lon] + self.stdout.write(self.style.SUCCESS('Done!\n')) + + df = pd.DataFrame.from_dict(zip_dict, orient='index') + df.columns = ['lat', 'lon'] + + self.stdout.write('Saving pickle...') + destination = os.path.join(BASE_DIR, 'pflaenzli', 'utils', 'plz.pkl') + df.to_pickle(destination) + + self.stdout.write(self.style.SUCCESS(f'Wrote pickle to {destination}\n'))