Skip to content

Commit

Permalink
use calamine to ingest files not opened by openpyxl
Browse files Browse the repository at this point in the history
  • Loading branch information
Guy-Galil committed Aug 15, 2024
1 parent dbe32df commit 4e62621
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 3 deletions.
2 changes: 1 addition & 1 deletion djang/importer/field_mapping.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[{
"tab_name": "סכום נכסי הקרן",
"tab_name": ["סכום נכסי הקרן","סכום נכסים"],
"fields": [{
"class_name": "importer.models.Kupot",
"field_name": "ID",
Expand Down
9 changes: 7 additions & 2 deletions djang/importer/management/commands/import_single_file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.core.management.base import BaseCommand, CommandError
from importer import models# TODO remove
from importer.services import xsls_ingester
from importer.services import ingester_with_calamine
import io
import os

Expand All @@ -17,14 +18,18 @@ def add_arguments(self, parser):
parser.add_argument("file", type=str)

def handle(self, *args, **options):
filename="/home/guyga/hasadna/penssion/xlsx-files/files/רשימת נכסים ברמת נכס בודד- Public - מסלול פנסיה-2020 רבעון 1-מגדל מקפת אישית למקבלי קצבה קיימים.xlsx"#options["file"]
filename="/home/guyga/hasadna/penssion/xlsx-files/files/512065202_p12157_p120.xlsx"#options["file"]
m=options["mode"]
ingester = xsls_ingester.xls_ingester()
with io.open(filename, "rb") as file:
xls = io.BufferedReader(file)
ingester = xsls_ingester.xls_ingester()
self.stdout.write("ingest %s" %filename )
ingester.ingest(filename, xls)
status=ingester.ingest(filename, xls)
if not status:
ingester = ingester_with_calamine.Ingester_whith_calamine()
self.stdout.write("ingest %s" %filename )
status=ingester.ingest(filename, xls)
self.stdout.write(
self.style.SUCCESS('Successfully imported "%s"' %options )
)
Expand Down
83 changes: 83 additions & 0 deletions djang/importer/services/ingester_with_calamine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@

from python_calamine import CalamineWorkbook
from importer.services import xsls_ingester
import traceback

class Ingester_whith_calamine(xsls_ingester.xls_ingester):


def __init__(self):
super().__init__()

def get_sheetnames(self, wb):
return wb.sheet_names

def getSheet(self, wb, sn):
for sn1 in sn:
ws = wb.get_sheet_by_name(sn1)
if ws != None:
return ws
return None

def parse_first_tab(self, wb, sn, tab):
# from first tab get report date, company, track name and track code
# optinally get report summary as well
self.put_header_fields(self.reference_objects)
for field in tab["fields"]:
if field["type"] == 'generated':
continue
else:
worksheet = self.getSheet(wb, sn)
if worksheet is not None:
workarray = worksheet.to_python(skip_empty_area=False)

for row in workarray[0:3]:
i = -1
for cell in row:
i += 1
found = False
if cell is not None and cell != '':
if str(cell).startswith("{PL}PickLst"):
break
stripped = str(cell).replace(
'*', '').replace(":", "").strip()
for field1 in tab["fields"]:
# in some of the reports there are multiple * characters of column titles as pointers to comments
if stripped in field1["column_title"]:
found = True
#j = 1
#for i in range(1, 4):
val = row[i + 1]
if val is not None:
self.reference_objects = self.put_in_model(
self.reference_objects, field1, val)
break
break
elif field1["type"] == "reference":
self.reference_objects = self.put_in_model(
self.reference_objects, field1, None)
break
self.save_first_tab()

return

def ingest(self, filename, file_stream):
try:
self.file = filename
wb = CalamineWorkbook.from_path(filename)
self.parse_spreadsheet(wb)
return True
except ValueError:

#traceback.print_exc()
print("report already exists")
return False

except Exception as e:
# log failed files
traceback.print_exc()
fni = importer.models.FilesNotIngested()
fni.file_name = filename
fni.info = "Failed to read workbook\n\r"+str(e)
fni.save()
return False

0 comments on commit 4e62621

Please sign in to comment.