views.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. from django.shortcuts import render
  2. from django.http import HttpResponse,FileResponse
  3. from .forms import *
  4. from django.views.decorators.csrf import csrf_exempt
  5. import os
  6. from bpconverter.settings import MEDIA_ROOT
  7. import PyPDF2
  8. import re
  9. import glob
  10. import mimetypes
  11. def handle_uploaded_file(f):
  12. print(dir(f.chunks))
  13. counter = 0
  14. with open(os.path.join(MEDIA_ROOT,f.name), 'wb+') as destination:
  15. counter = counter +1
  16. print(counter)
  17. for chunk in f.chunks():
  18. #print(chunk) ok. qualcosa carica
  19. destination.write(chunk)
  20. print("{}/out/*.pdf".format(MEDIA_ROOT))
  21. listtoremove = glob.glob("{}/out/*.pdf".format(MEDIA_ROOT))
  22. for ltr in listtoremove:
  23. print('removing:',ltr)
  24. os.remove(ltr)
  25. return os.path.join(MEDIA_ROOT,f.name)
  26. def esegue_conversione(inputfile):
  27. rec = re.compile('[A-Z]{6}\d{2}[A-Z]\d{2}[A-Z]\d{3}[A-Z]',flags=re.I)
  28. print('apertura file')
  29. pdfr=None
  30. numpdfr=0
  31. try:
  32. pdfr = PyPDF2.PdfReader(inputfile)
  33. except PyPDF2.errors.PdfReadError as pre:
  34. print('errore',pre)
  35. risultato = dict()
  36. if pdfr:
  37. print('numero di pagine',len(pdfr.pages))
  38. numpdfr=len(pdfr.pages)
  39. codicefiscale=None
  40. listapagine={} #contiene il codice fiscale e le pagine in cui e' stato trovato
  41. for i in range(0,len(pdfr.pages)):
  42. pagina = pdfr.pages[i]
  43. testo = pagina.extract_text(0)
  44. # il testo estratto e' tutto carattere.
  45. trovato=re.search(rec,testo)
  46. if trovato:
  47. print('trovato',trovato[0])
  48. if trovato[0] in listapagine:
  49. listapagine[trovato[0]].append(i)
  50. else:
  51. listapagine[trovato[0]] = list()
  52. listapagine[trovato[0]].append(i)
  53. risultato[trovato[0]] = ''
  54. for k in listapagine:
  55. # per ogni codice fiscale, genera le pagine che corrispondono (listapagine[k])
  56. print(k,listapagine[k])
  57. writer = PyPDF2.PdfWriter()
  58. for kk in listapagine[k]:
  59. print('pagine da salvare',kk)
  60. writer.add_page(pdfr.pages[kk])
  61. r = open("{}/out/{}.pdf".format(MEDIA_ROOT,k),'wb')
  62. writer.write(r)
  63. r.close()
  64. risultato[k] = {'file':"{}/out/{}.pdf".format(MEDIA_ROOT,k),'pagine':listapagine[k]}
  65. '''
  66. fino a qui e' tutto chiaro.
  67. il file elaborato viene salvato in out denetro la cartella MEDIA_ROOT
  68. '''
  69. else: risultato['errore']=[]
  70. #rimozione del file
  71. os.remove(inputfile)
  72. print('risultato',risultato)
  73. return (risultato,numpdfr)
  74. @csrf_exempt
  75. def frontpage(request):
  76. print(__name__)
  77. listaccepted = list()
  78. listaccepted.append('127.0.0.1')
  79. listaccepted.append('195.110.154.210')
  80. listaccepted.append('195.110.154.213')
  81. if 'REMOTE' in os.environ:
  82. remote = os.environ['REMOTE'].split(',')
  83. for r in remote:
  84. listaccepted.append(r)
  85. print(request.META)
  86. if 'x_real_ip'.upper() in request.META:
  87. print('x_real_ip',request.META['HTTP_X_REAL_IP'])
  88. if 'http_host'.upper() in request.META:
  89. print('http_host',request.META['HTTP_HOST'])
  90. if 'remote_address'.upper() in request.META:
  91. print('remote_addr',request.META['REMOTE_ADDR'])
  92. if 'http_x_forwarded_for'.upper() in request.META:
  93. print('http_x_forwarded_for',request.META['HTTP_X_FORWARDED_FOR'])
  94. auth = False
  95. if 'HTTP_X_REAL_IP' in request.META:
  96. print('presente X_REAL_IP',request.META['HTTP_X_REAL_IP'])
  97. if request.META['HTTP_X_REAL_IP'] in listaccepted:
  98. print('found presente X_REAL_IP',request.META['HTTP_X_REAL_IP'])
  99. auth = True
  100. if 'HTTP_HOST' in request.META:
  101. if request.META['HTTP_HOST'] in listaccepted:
  102. print('found presente HTTP_HOST',request.META['HTTP_HOST'])
  103. auth = True
  104. if 'REMOTE_ADDR' in request.META:
  105. if request.META['REMOTE_ADDR'] in listaccepted:
  106. print('found presente REMOTE_ADDR',request.META['REMOTE_ADDR'])
  107. auth = True
  108. if 'HTTP_X_FORWARDED_FOR' in request.META:
  109. if request.META['HTTP_X_FORWARDED_FOR'] in listaccepted:
  110. print('found presente HTTP_X_FORWARDED_FOR',request.META['HTTP_X_FORWARDED_FOR'])
  111. auth = True
  112. if not auth:
  113. return HttpResponse("Accesso Non Autorizzato", status=503, headers={})
  114. data = {}
  115. print('request.method',request.method)
  116. if request.method == 'POST':
  117. form = UploadFileForm(request.POST,request.FILES)
  118. print('form is valid',form.is_valid())
  119. if form.is_valid():
  120. nomefile = request.FILES['filecaricato']
  121. print('nome file',nomefile)
  122. filesalvato = handle_uploaded_file(nomefile)
  123. risultato = esegue_conversione(filesalvato)
  124. data['listafiles'] = risultato[0]
  125. data['numeropagine'] = risultato[1]
  126. print('dati passati al template',data)
  127. return render(request,'frontpage.html',data)
  128. def finalize(request,cf=None):
  129. if not cf:
  130. return render("download.error.html")
  131. fl_completa = os.path.join(MEDIA_ROOT,'out',"{}.pdf".format(cf))
  132. print('path completa',fl_completa)
  133. if os.path.isfile(fl_completa):
  134. try:
  135. fl = open(fl_completa, 'rb')
  136. except Exception as er:
  137. print('errore',er)
  138. data=dict()
  139. data['errore']="File non esistente o non ancora disponibile"
  140. return render(request,'documento.error.html',data)
  141. else:
  142. print('il file non esiste in document,download')
  143. data=dict()
  144. data['errore']='File non esistente o non ancora disponibile'
  145. return render(request,'documento.error.html',data)
  146. mime_type, _ = mimetypes.guess_type(fl_completa)
  147. fl = fl_completa
  148. print('fl',fl,mime_type) # deve essere un file pdf
  149. #response = HttpResponse(open(fl,'rb'))
  150. response = FileResponse(open(fl,'rb'), content_type='application/pdf')
  151. response['Content-Disposition'] = "inline; filename=%s" % "{}.pdf".format(cf)
  152. return response