Help with a test

Generic algorithms that could be implemented in almost any language, e.g. matrix operations and FFT.

Help with a test

Postby Gusvinhal » Mon May 15, 2017 12:27 pm

Hello everybody!

I'm trying to do a simple test and I can not understand what I'm doing wrong.

Basically, the host creates an matrix of integers of size 5x5, filled with zeros. Then sends to the shared memory (using a buffer). The core reads this matrix, places other values and sends it back. The host reads back and displays the values.

The problem is: when the matrix size is less than 5x5, the core can write the new values. When the matrix size is greater than or equal to 5x5, the core can not write the new values and the result is the matrix filled with zero.

What am I doing wrong?

The code is shown below.

Host side:

Code: Select all
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>

#include <e-hal.h>

int main(int argc, char *argv[])
{
   e_platform_t platform;
   e_epiphany_t dev;
   e_mem_t   mbuf;
   int rc, i, j;
       int vet[5][5];

        for(i = 0; i < 5; i++)
            for(j = 0; j < 5; j++)
                 vet[i][j] = 0;


   e_set_loader_verbosity(H_D0);
   e_set_host_verbosity(H_D0);

   e_init(NULL);
   e_reset_system();
   e_get_platform_info(&platform);

   rc = e_alloc(&mbuf, 0x00000000, sizeof(vet));
   
   if (rc != E_OK) {
      fprintf(stderr, "Failed to allocate shared memory. Error is %s\n",
            strerror(errno));
      return EXIT_FAILURE;
   }

   if ( E_OK != e_open(&dev, 0, 0, 1, 1) ) {
      fprintf(stderr, "Failed open device\n");
      return EXIT_FAILURE;
   }

        e_reset_group(&dev);

        if ( E_OK != e_load("e_mat.elf", &dev, 0, 0, E_FALSE) ) {
      fprintf(stderr, "Failed to load e_mat.elf\n");
      return EXIT_FAILURE;
   }

        printf("\n\nValues before Core: \n\n");

        for(i = 0; i < 5; i++)
            {
              for(j = 0; j < 5; j++)
                   {
                     printf("%d ",vet[i][j]);
                   }
               printf("\n");
            }

   e_write(&mbuf, 0, 0, 0x00000000, &vet, sizeof(vet));
   
   e_start_group(&dev);

   usleep(10000);

   e_read(&mbuf, 0, 0, 0x00000000, &vet, sizeof(vet));

   printf("\n\nValues after Core: \n\n");
   
   for(i = 0; i < 5; i++)
   {
      for(j = 0; j < 5; j++)
      {
         printf("%d ", vet[i][j]);
      }
      printf("\n");
   }

   e_close(&dev);
   
   e_free(&mbuf);
   
   e_finalize(); 

   return 0;
}


Core side:

Code: Select all
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "e_lib.h"

int main(void) {
   
        e_memseg_t        emem;

   int vet[5][5], cont = 1;
   int i, j;

   e_read(&emem, &vet, 0, 0, NULL, sizeof(vet));
      
   for(i = 0; i < 5; i++)
   {
      for(j = 0; j < 5; j++)
      {
         vet[i][j] = cont;
         cont++;
      }
   }
      
      e_write((void*)&emem, &vet, 0, 0, NULL, sizeof(vet));

   return EXIT_SUCCESS;
}




Thank you very much!
Gustavo.
Gusvinhal
 
Posts: 13
Joined: Thu Apr 06, 2017 6:35 pm

Re: Help with a test

Postby jar » Tue May 16, 2017 12:19 am

This is easy enough to do with the COPRTHR SDK...

host code (main.c):
Code: Select all
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "coprthr.h"
#include "coprthr_cc.h"
#include "coprthr_thread.h"

#define N 5
#define ECORES 1

typedef struct {
   int n;
   int* vet;
} arg_t;

int main(void)
{
   int nthr = ECORES;
   int i, j;

   // open device and get thread function
   int dd = coprthr_dopen(COPRTHR_DEVICE_E32, COPRTHR_O_STREAM);
   coprthr_program_t prg = coprthr_cc_read_bin("./device.e32",0);
   coprthr_sym_t thr = (coprthr_sym_t)coprthr_getsym(prg, "tfunc");

   // allocate vet memory N x N
   size_t vet_size = N * N * sizeof(int);
   size_t arg_size = sizeof(arg_t);
   coprthr_mem_t vet_mem = coprthr_dmalloc(dd, vet_size, 0);
   coprthr_mem_t arg_mem = coprthr_dmalloc(dd, arg_size, 0);
   int* vet = (int*)coprthr_memptr(vet_mem, 0);
   arg_t* arg = (arg_t*)coprthr_memptr(arg_mem, 0);
   *arg = (arg_t){ .n = N, .vet = vet };

   // initialize shared memory
   for(i = 0; i < N; i++)
      for(j = 0; j < N; j++)
         vet[i * N + j] = 0;

   // creating a joinable context
   coprthr_attr_t attr;
   coprthr_attr_init(&attr);
   coprthr_attr_setdetachstate(&attr, COPRTHR_CREATE_JOINABLE);
   coprthr_attr_setdevice(&attr, dd);

   // launch on epiphany
   coprthr_td_t td;
   coprthr_ncreate(nthr, &td, &attr, thr, (void*)&arg_mem);
   coprthr_attr_destroy(&attr);

   // waiting until complete
   void* status;
   coprthr_join(td, &status);

   // printing results
   printf("vet:\n");
   for(i = 0; i < N; i++) {
      printf("vet[%d][0-%d] = ", i, N);
      for(j = 0; j < N; j++) {
         printf("%5d",vet[i * N + j]);
      }
      printf("\n");
   }

   // cleanup
   coprthr_dfree(dd, arg_mem);
   coprthr_dfree(dd, vet_mem);

   coprthr_dwait(dd);
   coprthr_dclose(dd);
}


device code (device.c):
Code: Select all
#include <coprthr.h>

typedef struct {
   int n;
   int* vet;
} arg_t;

void __entry tfunc(arg_t* args)
{
   int i, j;
   int n = args->n;
   int* vet = args->vet;

   int cont = 1;

   for(i = 0; i < n; i++) {
      for(j = 0; j < n; j++) {
         vet[i * n + j] = cont;
         cont++;
      }
   }
}


Makefile:
Code: Select all
CCFLAGS += -O2 $(DEFS) -g

COPRTHR = /usr/local/browndeer/coprthr2

INCS = -I. -I$(COPRTHR)/include
LIBS = -L$(COPRTHR)/lib -lcoprthr -lcoprthrcc

COPRCC_FLAGS =
COPRCC_DEFS = $(DEFS)
COPRCC_INCS =
COPRCC_LIBS = -L$(COPRTHR)/lib

TARGET = main.x device.e32

all: $(TARGET)

.PHONY: clean install uninstall

.SUFFIXES:
.SUFFIXES: .c .o .x

main.x: main.c
   $(CC) $(CCFLAGS) -o main.x $(DEFS) $(INCS) main.c $(LIBS)

device.e32: device.c
   coprcc $(COPRCC_FLAGS) $(COPRCC_DEFS)  $(COPRCC_INCS) device.c $(COPRCC_LIBS) -o device.e32

clean: $(SUBDIRS)
   rm -f *.o
   rm -f $(TARGET)

distclean: clean


output (./main.x):
Code: Select all
COPRTHR-2-BETA (Anthem) build 20160630.1527
vet:
vet[0][0-5] =     1    2    3    4    5
vet[1][0-5] =     6    7    8    9   10
vet[2][0-5] =    11   12   13   14   15
vet[3][0-5] =    16   17   18   19   20
vet[4][0-5] =    21   22   23   24   25


Your original device code does not ever read the value of vet. It does not allocate thread local memory on core. It simply writes from shared memory from the epiphany core. There is no parallelism in this example. The host code may be a bit overkill for what you are doing but hopefully it's more flexible.
User avatar
jar
 
Posts: 284
Joined: Mon Dec 17, 2012 3:27 am

Re: Help with a test

Postby Gusvinhal » Wed May 17, 2017 2:54 pm

Hi!

This code don't have any parallellism because I want just test the comunication between the zynq and the core of epiphany =-] The code just transfers a data to core (via shared memory), change the value and transfers back.

Matrix with sizes less than 5x5, works. Matrix with sizes greater or equal 5x5 the core can not write the values in matrix.
Gusvinhal
 
Posts: 13
Joined: Thu Apr 06, 2017 6:35 pm

Re: Help with a test

Postby GreggChandler » Sun May 21, 2017 11:39 pm

It is important to remember that e_read() on the Epiphany core takes different parameters than e_read() on the host (ARM). In particular, to read from external memory on the Epiphany core, the "remote" parameter should be e_emem_config. In your Epiphany code, I don't see any initialization of "emem". I have include the minorly revised e_mat.c which should now work. Although you did not identify your link script, I assumed internal as otherwise your external memory choices would potentially conflict with newlib. If necessary, I can provide the Makefile I used to compile your programs.

e_mat.c
Code: Select all
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "e_lib.h"

int main(void) {
   
#if   0==1
        e_memseg_t        emem;
#endif

   int vet[5][5], cont = 1;
   int i, j;

#if   0==1
   e_read(&emem, &vet, 0, 0, NULL, sizeof(vet));
#else
   e_read(&e_emem_config, &vet, 0, 0, NULL, sizeof(vet));
#endif
     
   for(i = 0; i < 5; i++)
   {
      for(j = 0; j < 5; j++)
      {
         vet[i][j] = cont;
         cont++;
      }
   }
     
#if   0==1
      e_write((void*)&emem, &vet, 0, 0, NULL, sizeof(vet));
#else
      e_write(&e_emem_config, &vet, 0, 0, NULL, sizeof(vet));
#endif

   return EXIT_SUCCESS;
}


Output:
Code: Select all

Values before Core:

0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0


Values after Core:

1 2 3 4 5
6 7 8 9 10
11 12 13 14 15
16 17 18 19 20
21 22 23 24 25
GreggChandler
 
Posts: 66
Joined: Sun Feb 12, 2017 1:56 am

Re: Help with a test

Postby Gusvinhal » Mon May 29, 2017 2:28 pm

GreggChandler wrote:It is important to remember that e_read() on the Epiphany core takes different parameters than e_read() on the host (ARM). In particular, to read from external memory on the Epiphany core, the "remote" parameter should be e_emem_config. In your Epiphany code, I don't see any initialization of "emem". I have include the minorly revised e_mat.c which should now work. Although you did not identify your link script, I assumed internal as otherwise your external memory choices would potentially conflict with newlib. If necessary, I can provide the Makefile I used to compile your programs.

e_mat.c
Code: Select all
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "e_lib.h"

int main(void) {
   
#if   0==1
        e_memseg_t        emem;
#endif

   int vet[5][5], cont = 1;
   int i, j;

#if   0==1
   e_read(&emem, &vet, 0, 0, NULL, sizeof(vet));
#else
   e_read(&e_emem_config, &vet, 0, 0, NULL, sizeof(vet));
#endif
     
   for(i = 0; i < 5; i++)
   {
      for(j = 0; j < 5; j++)
      {
         vet[i][j] = cont;
         cont++;
      }
   }
     
#if   0==1
      e_write((void*)&emem, &vet, 0, 0, NULL, sizeof(vet));
#else
      e_write(&e_emem_config, &vet, 0, 0, NULL, sizeof(vet));
#endif

   return EXIT_SUCCESS;
}


Output:
Code: Select all

Values before Core:

0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0


Values after Core:

1 2 3 4 5
6 7 8 9 10
11 12 13 14 15
16 17 18 19 20
21 22 23 24 25


Hi!

I tried your code but doesn't worked.

I don't understand what do you mean with: #if 0==1. Can you explain for me?

I'm compile using the makefile of hello world example. Here:

Code: Select all
#!/bin/bash

set -e

ESDK=${EPIPHANY_HOME}
ELIBS="-L ${ESDK}/tools/host/lib"
EINCS="-I ${ESDK}/tools/host/include"
ELDF=${ESDK}/bsps/current/fast.ldf

SCRIPT=$(readlink -f "$0")
EXEPATH=$(dirname "$SCRIPT")
cd $EXEPATH

if [ -z "${CROSS_COMPILE+xxx}" ]; then
case $(uname -p) in
   arm*)
      # Use native arm compiler (no cross prefix)
      CROSS_COMPILE=
      ;;
      *)
      # Use cross compiler
      CROSS_COMPILE="arm-linux-gnueabihf-"
      ;;
esac
fi

# Build HOST side application
${CROSS_COMPILE}gcc src/mat.c -o Debug/mat.elf ${EINCS} ${ELIBS} -le-hal -le-loader -lpthread

# Build DEVICE side program
e-gcc -T ${ELDF} src/e_mat.c -o Debug/e_mat.elf -le-lib


Gusvinhal
 
Posts: 13
Joined: Thu Apr 06, 2017 6:35 pm

Re: Help with a test

Postby GreggChandler » Tue May 30, 2017 3:00 am

I used "#if 0==1" as a way to show you the changes that I made to your program. Because 0 will never equal 1, the original "if" clause will not be compiled, and the #else clause will be compiled. Where there is no #else clause, the #if clause will be ignored. #if/#else/#endif are pre-processor directives. In essence, I removed your declaration of "emem" as it was not needed. I then changed your parameters to "e_read()" and "e_write()" from "emem" to "e_emem_config". When you make those changes, and correctly compile the program, the program will produce the output that I provided, that is, non-zero values.

I built the program directly on a parallella board using a Makefile (for use with make). You appear to be using a shell script, that is, "build.sh". Either way should work. To fix your build script, change the word "fast" on the 8-th line to "internal". The resulting line should read:
Code: Select all
ELDF=${ESDK}/bsps/current/internal.ldf
GreggChandler
 
Posts: 66
Joined: Sun Feb 12, 2017 1:56 am

Re: Help with a test

Postby Gusvinhal » Tue May 30, 2017 9:52 am

GreggChandler wrote:I used "#if 0==1" as a way to show you the changes that I made to your program. Because 0 will never equal 1, the original "if" clause will not be compiled, and the #else clause will be compiled. Where there is no #else clause, the #if clause will be ignored. #if/#else/#endif are pre-processor directives. In essence, I removed your declaration of "emem" as it was not needed. I then changed your parameters to "e_read()" and "e_write()" from "emem" to "e_emem_config". When you make those changes, and correctly compile the program, the program will produce the output that I provided, that is, non-zero values.

I built the program directly on a parallella board using a Makefile (for use with make). You appear to be using a shell script, that is, "build.sh". Either way should work. To fix your build script, change the word "fast" on the 8-th line to "internal". The resulting line should read:
Code: Select all
ELDF=${ESDK}/bsps/current/internal.ldf


It worked! Thanks!!! But, what the difference between
Code: Select all
ELDF=${ESDK}/bsps/current/internal.ldf
and
Code: Select all
 ELDF=${ESDK}/bsps/current/fast.ldf
?


What is the maximum size vector that I can create? If I create a matrix of integer with size 100x100 the error comes back: the result is a matrix filled up with 0. The shared memory has 32MB, alright? Whats the problem?

If I have an matrix in shared memory and each core will access a different region of that matrix, how does the e_read () function? My question is because e_emem_config always return the address base of shared memory, alright?
Gusvinhal
 
Posts: 13
Joined: Thu Apr 06, 2017 6:35 pm

Re: Help with a test

Postby GreggChandler » Tue May 30, 2017 2:20 pm

The Epiphany SDK, or eSDK, installed on my Parallella (2016.11) ships with three separate .ldf files: internal.ldf, fast.ldf, and legacy.ldf. Table 5.3 (page 49 in REV 5.13.09.10) of the Epiphany eSDK Manual (found here: http://adapteva.com/docs/epiphany_sdk_ref.pdf ) defines the differences between the various .ldf files. Sections 5.5 and 5.6 give a brief explanation of why these different .ldf files are necessary. In summary, the Epiphany III supports two different types of memory: core memory, and external shared memory. (In the table these are referred to as: "Internal SRAM" and "External SDRAM". The difference between the "internal.ldf" and "fast.ldf" is where the various parts of you program are located. Understanding the differences is essential to writing a good Parallella/Epiphany program. I would discourage the use of the installed "legacy.ldf" as it appears broken.

In your program the "vet" array is allocated on the stack. That will severely limit the maximum size that your program will support. An integer array of 100x100 elements will require 10,000x4 bytes of memory. Since the "Internal SRAM" is limited to 32KB, your array will not fit in internal memory--much less on the stack. This means you will need to return to the "fast.ldf" model. When you return to "fast", you must be careful not to allocate your external memory buffer where other program memory components are located. The art of Epiphany programming lies in correctly managing these resources efficiently.

There are a host of other issues that you will need to consider in writing your program, not the least of which is the performance of "External SDRAM" (it is slower than "Internal SRAM",) and the "weak memory-order model" (it is non-standard and non-intuitive, although it only affects external memory). You also might want to review Section 4 (page 17 of REV 14.03.11) of the Epiphany Architecture manual, found here: http://www.adapteva.com/docs/epiphany_arch_ref.pdf .

It is my understanding that sample applications have been provided by Adapteva and/or community members to address the issues involved in writing an application such as yours. I am not familiar with the samples, as my programs have been written from scratch. Additionally, you might consider using some of the helper libraries that may assist in managing external/internal memory. Again, I am not generally familiar with them, as I have built my own. If you choose to build from scratch, don't under-estimate the work involved. If you choose to use a library, remember that will add more to the learning curve and consider support. If you choose to modify a sample, carefully consider the quality of the code before you modify it.
GreggChandler
 
Posts: 66
Joined: Sun Feb 12, 2017 1:56 am

Re: Help with a test

Postby Gusvinhal » Tue May 30, 2017 2:41 pm

GreggChandler wrote:In your program the "vet" array is allocated on the stack. That will severely limit the maximum size that your program will support. An integer array of 100x100 elements will require 10,000x4 bytes of memory. Since the "Internal SRAM" is limited to 32KB, your array will not fit in internal memory--much less on the stack. This means you will need to return to the "fast.ldf" model. When you return to "fast", you must be careful not to allocate your external memory buffer where other program memory components are located. The art of Epiphany programming lies in correctly managing these resources efficiently.


As the internal memory is too small, I'm trying to create the array in shared memory. Is there any way I can work on array data in shared memory without having to move it to core memory (with the e_read command)?
Gusvinhal
 
Posts: 13
Joined: Thu Apr 06, 2017 6:35 pm

Re: Help with a test

Postby GreggChandler » Tue May 30, 2017 9:31 pm

You can start by searching for the "SECTION()" attribute macro. It is described in one of the manual sections that I suggested you study. SECTION() will facilitate allocating a variable in any section of memory, including the "external SDRAM". The problem with using SECTION() is that you will need to determine where the data is stored so that your ARM/host program can initialize/access the data. Some libraries take care of such things for you. I wrote my own library that supports allocating/freeing external memory from either the ARM or Epiphany cores. I believe that e_shm is available with the eSDK, however, I haven't used it. I believe that e_shm can be used to manage external memory. I used my own shared memory libraries.

The other consideration that I mentioned in a prior post is the "weak memory-order model". I have successfully written code that accesses external memory directly, although, I don't use e_read() or e_write(). From the Epiphany cores it is very straightforward to access external memory via pointers. I use e_alloc() to access the external memory from the ARM as well. The tricky part is that the pointer values on the ARM side do not match the pointer values on the Epiphany side. "The "weak memory-order model" implies that no matter how you access external memory, you must be careful when ordering reads and writes.
GreggChandler
 
Posts: 66
Joined: Sun Feb 12, 2017 1:56 am


Return to Algorithms

Who is online

Users browsing this forum: No registered users and 1 guest