netZooPy icon indicating copy to clipboard operation
netZooPy copied to clipboard

LionessPuma issue: compute_puma() missing 1 required positional argument: 'sorted_index'

Open talkhanz opened this issue 3 years ago • 1 comments

from netZooPy.panda.panda import Panda
from netZooPy.puma.puma import Puma
from netZooPy.lioness.lioness import Lioness
from netZooPy.lioness.lioness_for_puma import LionessPuma
import pandas as pd
import os
from io import StringIO 
import sys

class Capturing(list):
    def __enter__(self):
        self._stdout = sys.stdout
        sys.stdout = self._stringio = StringIO()
        return self
    def __exit__(self, *args):
        self.extend(self._stringio.getvalue().splitlines())
        del self._stringio    # free up some memory
        sys.stdout = self._stdout
def read_expression_file(filepath = '',header = 'infer',server = 'PANDA',sample_mode = ''):
    header_float= False
    df = pd.read_csv(filepath,delimiter = ',',index_col = 0,header = 'infer')
    if server == 'LIONESS':
        if sample_mode == 'sample_name':
            return df
        elif sample_mode == "sample_num":
            try:
                header_int = [int(col) for col in  df.columns]
                header_float = [float(col) for col in df.columns]
                sample_list = list(range(len(df.columns)))
                if sample_list == header_int or sample_list == list(df.columns):
                    return df
                df = pd.read_csv(filepath,index_col =0 ,sep = ',', header = None)
                return df
            except:
                return df

    try:
        header_float = [float(col) for col in df.columns]
        header_float = True
    except:
        header_float = False
    if header_float:
        df = pd.read_csv(filepath,index_col =0 ,sep = ',', header = None)
    return df
def create_panda_obj(dataframes = {} , filepaths = {},alpha = 0.1,mode = 'union',precision = "double"):
    #print("panda_input_filepaths: ",filepaths)
    panda_obj = None
    filepaths_values = list(filepaths.values())
    if isinstance(filepaths_values,list):
        if len(filepaths_values) == 3:
            if filepaths_values[0] == "" or filepaths_values[1] == "" or filepaths_values[2] == "":
                filepaths_values = []
        elif len(filepaths_values) < 3:
            filepaths_values = []

    else:
        filepaths_values = []
    if len(dataframes.keys()) > 0:
        panda_obj = Panda(dataframes['expression'],dataframes['motif'],dataframes['ppi'],modeProcess = mode, alpha = alpha,save_memory=False,precision=precision,remove_missing=False, keep_expression_matrix=True)
    elif len(filepaths_values)>0:
        panda_obj = Panda(filepaths["expression"],filepaths["motif"],filepaths["ppi"],modeProcess = mode, alpha = alpha,save_memory=False,precision=precision,remove_missing=False, keep_expression_matrix=True)
    #print("panda_obj: ",panda_obj)
    return panda_obj
def create_puma_obj(dataframes = {} , filepaths = {},alpha = 0.1, mode = "union",precision = "double",keep_expression_matrix= True):
    #print("#######creating puma obj#####")
    puma_obj = None
    all_files_uploaded = True
    #print(filepaths)
    if len(filepaths.keys()) > 0:
        puma_obj = Puma(filepaths["expression"],filepaths["motif"],None,filepaths["mir"],save_memory=False,alpha = alpha, modeProcess = mode,precision = precision,keep_expression_matrix=keep_expression_matrix,save_tmp=False)
       
    if len(dataframes.keys()) > 0:
        puma_obj =  Puma(dataframes['expression'],dataframes['motif'],None,dataframes['mir'],alpha = alpha, modeProcess = mode,precision = precision,keep_expression_matrix=keep_expression_matrix,save_tmp=False)
    
    return puma_obj 
def run_lioness(dataframes = {},input_mode = 'PANDA',alpha = 0.1,mode = 'union',precision='double',start = 0 ,end = 0):
    t = type('test', (object,), {})()
    cwd = os.getcwd()
    output = []
    lioness_results_folder = os.path.join(cwd,"results","lioness")
    if not os.path.isdir(lioness_results_folder):
        os.makedirs(lioness_results_folder)
    
    try:
        if input_mode == "PANDA":
            
            with Capturing() as output:
                try:
                    panda_obj = create_panda_obj(dataframes= dataframes,alpha = float(alpha), mode = mode,precision=precision,remove_missing=False, keep_expression_matrix= True)
                except:
                    return {'status': 'failed', 'reason': 'PANDA_ANALYSIS_ERROR'}

                print("panda_obj:", panda_obj)
                try:
                    lioness_obj = Lioness(panda_obj,save_dir = lioness_results_folder,start = start,end= end,save_fmt = "aaa",alpha = float(alpha),precision=precision)
                except:
                    return {'status': 'failed', 'reason': 'LIONESS_ANALYSIS_ERROR'}

                try:
                    Panda.processData(self=t,modeProcess=mode, motif_file=dataframes["motif"], expression_file=dataframes["expression"], ppi_file=dataframes["ppi"], remove_missing=False, keep_expression_matrix=False)
                except:
                    return {'status': 'failed', 'reason': 'PANDA_ANALYSIS_ERROR'}

                W = t.motif_matrix_unnormalized
                tfs = t.unique_tfs
                genes = t.gene_names
                #print("####W######")
                # #print(W)
                # #print(W.shape)
                W_df = pd.DataFrame(W,index = tfs,columns = genes)
                W_df= W_df.stack().reset_index().rename(columns={'level_0':'tf','level_1':'gene', 0:'motif'})
                lioness_obj_df = pd.DataFrame(lioness_obj.export_lioness_results)
                print(lioness_obj_df)
                col_names =  ["tf","gene","force"]
                lioness_obj_df.columns = col_names
                lioness_obj_df["motif"] = W_df['motif']
                cols_order = ["tf","gene","motif","force"]
                lioness_obj_df = lioness_obj_df[cols_order]
                adj_matrix = create_adj_matrix_from_rows_df(df = lioness_obj_df,gene_names= genes, unique_tfs= tfs,server_name = "lioness")
                #print("after:",adj_matrix.shape)
                #print("###adj_matrix_targeting######")
                #print(adj_matrix)
                lioness_adj_mat_df = pd.DataFrame(adj_matrix,index = tfs,columns = genes)
        elif input_mode == "PUMA":
            #puma.py require mir to be a filepath
            
            with Capturing() as output:
                puma_obj = create_puma_obj(dataframes = dataframes,alpha = alpha, mode = mode,precision = precision,keep_expression_matrix=True)
                #print("puma_obj:",puma_obj)
                lioness_obj = LionessPuma(puma_obj,save_dir = lioness_results_folder,start = start,end= end,save_fmt = "aaa",alpha = float(alpha),precision=precision, )
                #print("lkionessPuma:",lioness_obj)
                Panda.processData(self=t,modeProcess=mode, motif_file=dataframes["motif"], expression_file=dataframes["expression"], ppi_file=None, remove_missing=False, keep_expression_matrix=False)
            W = t.motif_matrix_unnormalized
            tfs = t.unique_tfs
            genes = t.gene_names
            #print("####W######")
            # #print(W)
            # #print(W.shape)
            W_df = pd.DataFrame(W,index = tfs,columns = genes)
            W_df= W_df.stack().reset_index().rename(columns={'level_0':'tf','level_1':'gene', 0:'motif'})
            lioness_matrix = lioness_obj.export_lioness_results
            #print("###########3LIONESS_MATRIX############")
            # #print(type(lioness_matrix))
            # #print(lioness_matrix)
            if isinstance(lioness_matrix,np.ndarray):
                lioness_obj_df = pd.DataFrame(lioness_matrix,columns = ["tf","gene","motif","force"])
            adj_matrix = create_adj_matrix_from_rows_df(df = lioness_obj_df,gene_names= genes, unique_tfs= tfs,server_name = "lioness")
            #print("after:",adj_matrix.shape)
            #print("###adj_matrix_targeting######")
            #print(adj_matrix)
            lioness_adj_mat_df = pd.DataFrame(adj_matrix,index = tfs,columns = genes)
        elif input_mode == "Coexpression":

            with Capturing() as output:
                panda_obj = create_panda_obj(dataframes = dataframes,alpha = float(alpha), mode = mode,precision=precision,remove_missing=False, keep_expression_matrix= True)    
                lioness_obj = Lioness(panda_obj,save_dir = lioness_results_folder,start = start,end= end,save_fmt = "aaa",alpha = float(alpha),precision=precision)
            # Panda.processData(self=t,modeProcess=mode, motif_file=lioness_input_filepaths["motif"], expression_file=lioness_input_filepaths["expression"], ppi_file=None, remove_missing=False, keep_expression_matrix=False)
            # W = None
            # tfs = t.unique_tfs
            # genes = t.gene_names
            col_names = ["gene1","gene2","force"]
            #setting columns = ["tf,"gene","force"] just to make the frontend work 
            fake_col_names = ["tf","gene","force"]
            # otherwise the names should be gene1,gene2,force
            lioness_obj_df = pd.DataFrame(lioness_obj.export_lioness_results)
            lioness_obj_df.columns = fake_col_names
            # lioness_obj_df["motif"] = -1
            # cols_order = ["tf","gene","motif","force"]
            cols_order = ["tf","gene","force"]
            lioness_obj_df = lioness_obj_df[cols_order]
            lioness_adj_mat_df = Lioness(panda_obj,save_dir = lioness_results_folder,start = start,end= end,save_fmt = "aaa",alpha = float(alpha),precision=precision,output = 'gene_targeting').export_lioness_results
    except Exception as e:
        print('###run_lioness_exception###')
        print(e)
        lioness_obj_df,lioness_adj_mat_df = None,None
    return lioness_obj_df,lioness_adj_mat_df,output
def main():
  folder = '/home/'
  lioness_input_filepaths = {"expression": folder + 'ToyExpressionData.csv',  "motif":  folder + 'ToyMotifData.csv',"mir":  folder + 'ToyMiRList.csv','ppi':  folder + 'ToyPPIData.csv'}
  mode = 'union'
  precision = 'double'
  start = 1
  end = 1
  sample_mode = 'sample_num'
  input_mode = "PUMA"
  sample_num = 1
  alpha = 0.1


  sep = ','
  dataframes = {
      'expression':None,
      'motif': None,
      'ppi': None,
      'mir':None
      }
  dataframes['expression'] = read_expression_file(filepath = lioness_input_filepaths['expression'],header = 'infer',server = 'LIONESS',sample_mode = sample_mode)
  
  if input_mode == "PANDA":
      dataframes.update({
      'motif': pd.read_csv(lioness_input_filepaths['motif'],sep  = sep,header = None),
      'ppi': pd.read_csv(lioness_input_filepaths['ppi'],sep  = sep,header = None)
      })
      
   
  elif input_mode == "PUMA":
      #puma.py require mir to be a filepath
      dataframes.update({
      'motif': pd.read_csv(lioness_input_filepaths['motif'],sep  = sep,header = None),
      'mir': lioness_input_filepaths['mir'],#pd.read_csv(lioness_input_filepaths['mir'],sep  = sep,header = None)
          
      })
      
  elif input_mode == "Coexpression":
      pass
  else:
      return {'status': 'failed', 'reason': 'INVALID_INPUT_MODE_ARGUMENT'}
  lioness_obj_df,lioness_adj_mat_df,output = run_lioness(dataframes = dataframes,input_mode = input_mode,alpha = alpha,mode = mode,precision=precision,start =start ,end = end )
main()

talkhanz avatar Oct 25 '22 15:10 talkhanz

@violafanfani @talkhanz said there is a difference between puma and lionesspuma

https://github.com/netZoo/netZooPy/blob/master/netZooPy/puma/puma.py#L319

https://github.com/netZoo/netZooPy/blob/master/netZooPy/lioness/lioness_for_puma.py#L139

marouenbg avatar Oct 25 '22 17:10 marouenbg

This should now be working

violafanfani avatar Nov 30 '22 21:11 violafanfani