Add command to get the plz index

This commit is contained in:
Jannis Portmann 2023-05-17 16:34:45 +02:00
parent 7749d98e87
commit 4be2ee5e22

View file

@ -0,0 +1,68 @@
import json
import os
from urllib import request
import geojson
import pandas as pd
from geopy.distance import distance
from django.core.management.base import BaseCommand
from pflaenzli_django.settings import BASE_DIR
class Command(BaseCommand):
help = 'Get the zip code index from post and compile it to a dataframe pickle'
def handle(self, *args, **options):
self.parse_data(self.download_geojson(api='v2', data='v2'))
self.stdout.write(self.style.SUCCESS('Done!'))
def download_geojson(self, api, data):
file = f'plz_verzeichnis_{data}.json'
if os.path.exists(file):
self.stdout.write('File already downloaded. Skipping...\n')
else:
self.stdout.write('Downloading geojson...')
url = f'https://swisspost.opendatasoft.com/api/{api}/catalog/datasets/plz_verzeichnis_{data}/exports/geojson'
request.urlretrieve(url, file)
self.stdout.write(self.style.SUCCESS('Done!\n'))
return file
def parse_data(self, file):
self.stdout.write('Opening file...')
# Load the GeoJSON data for the zip codes
with open(file, encoding='UTF-8') as f:
full_data = json.load(f)
self.stdout.write(self.style.SUCCESS('Done!\n'))
self.stdout.write('Parsing file...')
zip_dict = {}
for plz_entry in full_data['features']:
plz_entry = plz_entry['properties']
try:
plz = plz_entry['postleitzahl']
except (AttributeError, TypeError, ValueError):
continue
try:
lat = plz_entry['geo_point_2d']['lat']
lon = plz_entry['geo_point_2d']['lon']
except (AttributeError, TypeError):
continue
if plz is None or lat is None or lon is None:
continue
zip_dict[int(plz)] = [lat, lon]
self.stdout.write(self.style.SUCCESS('Done!\n'))
df = pd.DataFrame.from_dict(zip_dict, orient='index')
df.columns = ['lat', 'lon']
self.stdout.write('Saving pickle...')
destination = os.path.join(BASE_DIR, 'pflaenzli', 'utils', 'plz.pkl')
df.to_pickle(destination)
self.stdout.write(self.style.SUCCESS(f'Wrote pickle to {destination}\n'))