Skip to content
 
Add note

tttt


file here


      

# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function

from collections import Counter
import pandas as pd
from django.contrib.auth.decorators import user_passes_test
from django.shortcuts import redirect, render
from django.urls import reverse

from dataops import ops, pandas_db
from ontask.permissions import is_instructor
from workflow.ops import get_workflow
from .forms import UploadCSVFileForm
from unidecode import unidecode


@user_passes_test(is_instructor)
def remove_non_ascii(text):
    return unidecode(unicode(text, encoding = "utf-8"))

def csvupload1(request):
    """
    The four step process will populate the following dictionary with name
    upload_data (divided by steps in which they are set

    STEP 1:

    initial_column_names: List of column names in the initial file.

    column_types: List of column types as detected by pandas

    src_is_key_column: Boolean list with src columns that are unique

    step_1: URL name of the first step

    :param request: Web request
    :return: Creates the upload_data dictionary in the session
    """

    # Get the current workflow
    workflow = get_workflow(request)
    if not workflow:
        return redirect('workflow:index')

    # Bind the form with the received data
    form = UploadCSVFileForm(request.POST or None, request.FILES or None)

    # Process the initial loading of the form
    if request.method != 'POST':
        return render(request, 'dataops/upload1.html',
                      {'form': form,
                       'wid': workflow.id,
                       'dtype': 'CSV',
                       'dtype_select': 'CSV file',
                       'prev_step': reverse('dataops:list')})

    # Process the reception of the file
    if not form.is_multipart():
        msg = "CSV upload form is not multiform"
        context = {'message': msg}

        meta = request.META.get('HTTP_REFERER', None)
        if meta:
            context['meta'] = meta
        return render(request, 'critical_error.html', context=context)

    # If not valid, this is probably because the file submitted was too big
    if not form.is_valid():
        return render(request, 'dataops/upload1.html',
                      {'form': form,
                       'wid': workflow.id,
                       'dtype': 'CSV',
                       'dtype_select': 'CSV file',
                       'prev_step': reverse('dataops:list')})

    # Process CSV file using pandas read_csv
    try:
        #data_frame = pandas_db.load_df_from_csvfile(
        #    request.FILES['file'],
        #    form.cleaned_data['skip_lines_at_top'],
        #    form.cleaned_data['skip_lines_at_bottom'])
        data_frame = pd.read_csv(
            request.FILES['file'],
            index_col=False,
            infer_datetime_format=True,
            quotechar='"',
            skiprows=form.cleaned_data['skip_lines_at_top'],
            skipfooter=form.cleaned_data['skip_lines_at_bottom']
            #,encoding='utf-8'
        )
        # Strip white space from all string columns and try to convert to
        # datetime just in case
        cols = {}
        for x in list(data_frame.columns):
            y=remove_non_ascii(x.strip())
            cols[x]=y
            
            if data_frame[x].dtype.name == 'object':
                
                # Column is a string!
                #data_frame[x] = data_frame[x].str.strip()

                # Try the datetime conversion
                try:
                    series = pd.to_datetime(data_frame[x],
                                            infer_datetime_format=True)
                    # Datetime conversion worked! Update the data_frame
                    data_frame[x] = series
                except ValueError:
                    pass
        data_frame.rename(columns=cols, inplace=True )
    except Exception as e:
        form.add_error('file',
                       'File could not be processed ({0})'.format(e.message))
        return render(request,
                      'dataops/upload1.html',
                      {'form': form,
                       'dtype': 'CSV',
                       'dtype_select': 'CSV file',
                       'prev_step': reverse('dataops:list')})

    # If the frame has repeated column names, it will not be processed.
    if len(set(data_frame.columns)) != len(data_frame.columns):
        dup = [x for x, v in Counter(list(data_frame.columns)) if v > 1]
        form.add_error(
            'file',
            'The file has duplicated column names (' +
            ','.join(dup) + ').')
        return render(request, 'dataops/upload1.html',
                      {'form': form,
                       'dtype': 'CSV',
                       'dtype_select': 'CSV file',
                       'prev_step': reverse('dataops:list')})

    # If the data frame does not have any unique key, it is not useful (no
    # way to uniquely identify rows). There must be at least one.
    src_is_key_column = ops.are_unique_columns(data_frame)
    if not any(src_is_key_column):
        form.add_error(
            'file',
            'The data has no column with unique values per row. '
            'At least one column must have unique values.')
        return render(request, 'dataops/upload1.html',
                      {'form': form,
                       'dtype': 'CSV',
                       'dtype_select': 'CSV file',
                       'prev_step': reverse('dataops:list')})

    # Store the data frame in the DB.
    try:
        # Get frame info with three lists: names, types and is_key
        frame_info = ops.store_upload_dataframe_in_db(data_frame, workflow.id)
    except Exception as e:
        form.add_error(
            'file',
            'Sorry. This file cannot be processed.'
        )
        return render(request, 'dataops/upload1.html',
                      {'form': form,
                       'dtype': 'CSV',
                       'dtype_select': 'CSV file',
                       'prev_step': reverse('dataops:list')})

    # Dictionary to populate gradually throughout the sequence of steps. It
    # is stored in the session.
    request.session['upload_data'] = {
        'initial_column_names': frame_info[0],
        'column_types': frame_info[1],
        'src_is_key_column': frame_info[2],
        'step_1': 'dataops:csvupload1'
    }

    return redirect('dataops:upload_s2')

Edit page