the script has been updated and optimized

b915d1d8 · Emma Oceguera-Conchas · 511d9a8a · b915d1d8 · b915d1d8 · b915d1d8
Commit b915d1d8 authored 1 year ago by Emma Oceguera-Conchas
--- a/1.rasterize_mask_and_fragmentation_5m.py
+++ b/1.rasterize_mask_and_fragmentation_5m.py
@@ -76,7 +76,7 @@ def main(sbbox):
    # Define GDAL command for fragmentation rasterization
    cmd_frag = """gdal_rasterize -tr 5 5 -co "COMPRESS=DEFLATE" -burn 9 -ot Byte -te {0} {1} {2} {3} \
        -sql "select st_intersection(f.geom, st_geomfromtext('POLYGON (({0} {1}, {2} {1}, {2} {3}, {0} {3}, {0} {1}))', 3035)), \
-        f.frag_code, f.id from fragmentation.forest_clc2018_v2020_clip_ms f where \
+        f.frag_code, f.id from fragmentation.estonia_forest f where \
        st_intersects(f.geom, st_geomfromtext('POLYGON (({0} {1}, {2} {1}, {2} {3}, {0} {3}, {0} {1}))', 3035)) \
        and f.frag_code = 999"\
        PG:"host=localhost user=postgres dbname=NaturaConnect-Connectivity password=07089452" {4}""".format(xmin, ymin, xmax, ymax, outname_frag)
@@ -84,7 +84,7 @@ def main(sbbox):
    # Define GDAL command for mask rasterization
    cmd_mask = """gdal_rasterize -tr 5 5 -co "COMPRESS=DEFLATE" -burn 1 -ot Byte -te {0} {1} {2} {3} \
        -sql "select st_intersection(f.geom, st_geomfromtext('POLYGON (({0} {1}, {2} {1}, {2} {3}, {0} {3}, {0} {1}))',3035)), \
-        f.id from eu_ref_grif.eu_mask_single f where \
+        f.id from fragmentation.mask_estonia f where \
        st_intersects(f.geom, st_geomfromtext('POLYGON (({0} {1}, {2} {1}, {2} {3}, {0} {3}, {0} {1}))', 3035))" \
        PG:"host=localhost user=postgres dbname=NaturaConnect-Connectivity password=07089452" {4}""".format(xmin, ymin, xmax, ymax, outname_mask)
@@ -102,9 +102,9 @@ def main(sbbox):
 # Define the path to the input shapefile and output paths for mask and fragmentation rasters
-inshape = r"I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\data\eu_reference_grid\europe_100km.shp" # to get the extent
+inshape = r"I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\data\mask\estonia_10km.shp" # to get the extent
-outpath_frag = r"S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\frag_raster"
+outpath_frag = r"I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\estonia_forest_fragmentation\frag_raster"
-outpath_mask  =  r"S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\land_raster"
+outpath_mask  =  r"I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\estonia_forest_fragmentation\land_raster"
 # Define a command to get the total extent of the shapefile
 cmd = 'ogrinfo -al -so ' + inshape    

--- a/10.fragmentation_indicatior_cal.py
+++ b/10.fragmentation_indicatior_cal.py
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Nov 11 12:39:02 2023
+@author: no67wuwu
+"""
+import os
+import numpy as np
+import pandas as pd
+from glob import glob
+import MacPyver as mp
+from multiprocessing import Pool
+# fragmentation function from Aurora
+def frag_ind(s):
+    area = ((sur_radius*2)+1)**2
+    a = map(lambda x: x**2, s)
+    a = sum(a)*1.0/area
+    return a
+# path to files:
+tile_path = r"S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\result_grass\frag_unique_masled_tr1000"
+# path to textfile / split the file / result from the grass report function
+report_path = r"S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\result_grass\tables\report_unique_areas.file"
+df_report = pd.read_csv(report_path, sep='|', skiprows=15 ,skipfooter=3, names=['a', 'frag_id', 'b','area','c'], thousands=',', engine='python')
+df_report = df_report.loc[:, ['frag_id', 'area']]
+outpath = r'S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\result\frag_ind'
+log = open(outpath+'\\frag_ind.log', 'w')
+def get_tiles(tile):
+    ''' get surrounding tiles '''
+    name = tile.split(os.sep)[-1]
+    y_top = int(name.split('-')[1])-1
+    y_bot = y_top + 2
+    x_left = int(name.split('-')[2].split('.')[0])-1
+    x_right = x_left + 2
+    #all possible surrounding tiles
+    sur_tiles = ['fr_cl_tile-{}-{}.tif'.format(('000'+str(y))[-3:], ('000'+str(x))[-3:]) 
+            for y in range(y_top, y_bot+1)  
+                for x in range(x_left, x_right+1) ]
+    #position of the tiles               
+    pos_dic_tiles = {}
+    pos = ['0,0','0,1','0,2',
+           '1,0','1,1','1,2',
+           '2,0','2,1','2,2']
+    pos_count = 0
+    for st in sur_tiles:
+        pos_dic_tiles[st] = pos[pos_count]
+        pos_count +=1
+    #get all existing tiles
+    sur_tile_paths = [x for x in tiles for y in sur_tiles if y in x]
+    # create empty array
+    array = np.zeros([300,300])
+    array.fill(-1)
+    # fill array with data
+    for stp in sur_tile_paths:
+        # get position in merge array
+        stp_name = stp.split(os.sep)[-1]
+        sub_rast = mp.raster.tiff.read_tif(stp)
+        # Check the shape of the sub_rast
+        if sub_rast.shape != (100, 100):
+            print(f"Unexpected shape for {stp_name}: {sub_rast.shape}")
+            continue  # Skip this tile or handle the error as needed
+        # fill position based on the position dic
+        ymin = int(pos_dic_tiles[stp_name].split(',')[0]) * 100
+        ymax = ymin +100
+        xmin = int(pos_dic_tiles[stp_name].split(',')[1]) * 100
+        xmax = xmin + 100
+        array[ymin:ymax, xmin:xmax] = sub_rast
+    return array
+    # # create empty array
+    # array = np.zeros([300,300])
+    # array.fill(-1)
+    # #fill array with data
+    # for stp in sur_tile_paths:
+    #     #get position in merge array
+    #     stp_name = stp.split(os.sep)[-1]
+    #     sub_rast = mp.raster.tiff.read_tif(stp)
+    #     # fill position based on the position dic
+    #     ymin = int(pos_dic_tiles[stp_name].split(',')[0]) * 100
+    #     ymax = ymin +100
+    #     xmin = int(pos_dic_tiles[stp_name].split(',')[1]) * 100
+    #     xmax = xmin + 100
+    #     array[ymin:ymax, xmin:xmax] = sub_rast
+    # return array
+def bbox(x,y, sur_radius):
+    '''get the bounding box'''
+    xmin = x - sur_radius
+    xmax = x + sur_radius + 1
+    ymin = y - sur_radius
+    ymax = y + sur_radius + 1
+    return xmin, xmax, ymin, ymax
+def main(tile):
+    # get surrownding tiles
+    raster = get_tiles(tile)
+    orig_raster = mp.raster.tiff.read_tif(tile)
+    out_count_area = np.zeros(orig_raster.shape)
+    out_orig_area = np.zeros(orig_raster.shape)
+    frag_ids = np.unique(raster[:,:]) 
+    #frag_ids = np.unique(raster[95:205, 95:205]) 
+    frag_ids = np.delete(frag_ids, np.where(frag_ids==0))
+    df_rep_subset = df_report.loc[df_report['frag_id'].isin(frag_ids)]
+    for x in range(100,200):
+        for y in range(100, 200):
+            if raster[x,y]!=0:
+                # get surrounding cells
+                xmin, xmax, ymin, ymax = bbox(x, y, sur_radius)
+                s_cells = raster[ xmin : xmax, ymin : ymax]
+                window_ids, window_counts = np.unique(s_cells, return_counts=True)
+                if 0 in window_ids:
+                    window_ids = window_ids[1:]
+                    window_counts = window_counts[1:]
+                #calc fragmentation based on pixel count in moving window
+                frag_indi_counts = frag_ind(window_counts)
+                #calc fragmentation based on orig areas
+                df_window = df_rep_subset.loc[df_rep_subset['frag_id'].isin(window_ids)]
+                df_window_array = np.array(df_window['area']/(1000*1000)).astype(int)
+                frag_indi_orig = frag_ind(df_window_array)
+                out_count_area[x-100,y-100] = int(frag_indi_counts + .5)
+                out_orig_area[x-100,y-100] = int(frag_indi_orig + .5)
+    # considering just the pixel in the window    
+    outraster_counts = r'S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\result\window_count3' + os.sep + tile.split(os.sep)[-1]
+    mp.raster.tiff.write_tif(tile,outraster_counts, out_count_area, 1)
+    # considering the original area from the id    
+    outraster_orig_area = r'S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\result\orig_area3' + os.sep + tile.split(os.sep)[-1]
+    mp.raster.tiff.write_tif(tile,outraster_orig_area, out_orig_area, 1)
+    return 'tile: {} written'.format(tile.split(os.sep))
+tiles = glob(tile_path + '\*.tif')
+sur_radius = 5 #radius of cells for surrounding the center_pixel
+if __name__=='__main__':
+    pool = Pool(20)
+    for bar in pool.imap_unordered(main, tiles):
+        log.write(bar)
+        log.flush()
+    log.close() 
--- a/11.convert_result_to_vrt.py
+++ b/11.convert_result_to_vrt.py
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov 15 09:51:27 2023
+@author: no67wuwu
+"""
+from osgeo import gdal
+import os
+# Directory containing TIFF files
+directory = r'S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\result\window_count3'
+# Output VRT file path
+vrt_path = r'S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\result\window_count3.vrt'
+# Get all TIFF files in the directory
+tiff_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.tif')]
+# Create VRT
+vrt = gdal.BuildVRT(vrt_path, tiff_files)
+vrt = None  # This will save and close the VRT file
--- a/3.generate_list_valid_tiles.py
+++ b/3.generate_list_valid_tiles.py
@@ -3,6 +3,7 @@
 Created on Mon Oct 23 16:05:56 2023
 @author: no67wuwu
+work: generate a list of a valid tiles from the combined tiles in the step 2
 """
 import os
@@ -31,14 +32,13 @@ def get_stdout(cmd, verbose=False):
            stdout.append(line)
        if verbose:
            print(line),
-        if line == '' and p.poll() != None: #p.poll is checking if the process is still running 
+        if line == '' and p.poll() != None: #p.poll is checking if the process is still running
-                                            #is it is running it returns None
            break
    return stdout
 # Define the pathe where all the combined files are
 # path = r'S:\Emmanuel_OcegueraConchas\Fragmentation\frag_and_land\*.tif'
-path = r"S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\frag_land_combined\*.tif"
+path = r"I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\estonia_forest_fragmentation\land_frag_combined\*.tif"
 # get the list of all the files
 files = glob(path)
@@ -47,7 +47,7 @@ files = glob(path)
 cmd = 'gdalinfo -stats %s'
 # Open a text file in writing modus. This file will be used to store the paths of the tif files that meet cetain criteria
-vrt_imgs = open(r'S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\valid_tiles_combined.txt', 'w')
+vrt_imgs = open(r'I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\estonia_forest_fragmentation\valid_tiles.txt', 'w')
 for f in files:
    stats = get_stdout(cmd %f)

--- a/4.create_vrt_from_valid_tiles.py
+++ b/4.create_vrt_from_valid_tiles.py
@@ -11,8 +11,8 @@ work: create the vrt from the valid tiles that has been created in the step 4
 from osgeo import gdal
 # Define the paths
-input_txt_file = r"S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\valid_tiles_test2.txt" # Replace with the path to your text file
+input_txt_file = r"I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\estonia_forest_fragmentation\valid_tiles.txt" # Replace with the path to your text file
-output_vrt_file = r"S:\Emmanuel_OcegueraConchas\eu_fragmentation_forest\valid_tiles_test2.vrt"     # Replace with the desired VRT file path
+output_vrt_file = r"I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\estonia_forest_fragmentation\valid_tiles.vrt"     # Replace with the desired VRT file path
 # Read the list of image file paths from the text file
 with open(input_txt_file, "r") as file:

--- a/5.load_vrt_to_grass.py
+++ b/5.load_vrt_to_grass.py
@@ -10,7 +10,7 @@ input_vrt = "S:/Emmanuel_OcegueraConchas/eu_fragmentation_forest/valid_tiles_tes
 # Define the name of the output raster layer in GRASS GIS
-output_raster = "eu_forest_frag_test"
+output_raster = "test"

--- a/6.clumps_full_raster_and_report_GRASS.py
+++ b/6.clumps_full_raster_and_report_GRASS.py
+import grass.script as gscript
+# Use the GRASS overwrite environment variable to allow overwriting of existing data
+gscript.use_temp_region()  # Ensures that the region changes are temporary
+try:
+    # Set the Region of Interest to match the raster map that is being used
+    gscript.run_command('g.region', raster='test@PERMANENT')
+    # Calculate Clumps
+    gscript.run_command('r.clump', input='test@PERMANENT', output='test_clump', overwrite=True)
+    # Specify the full path for the output report file
+    report_output_path = 'S:/Emmanuel_OcegueraConchas/eu_fragmentation_forest/test.txt'
+    # Generate a Report with Unique Area and Units and save it to the specified path
+    gscript.run_command('r.report', map='test', units='me', flags='n', output=report_output_path, overwrite=True)
+finally:
+    # Release the temporary region to revert back to the user's default region settings
+    gscript.del_temp_region()
+# Note: The script assumes that 'eu_forest_frag@PERMANENT' and 'eu_forest_frag_clump' refer to the correct raster maps.
--- a/7.create_tiles_and_export_files_GRASS.py
+++ b/7.create_tiles_and_export_files_GRASS.py
+#!/usr/bin/env python3
+import os
+import grass.script as gscript
+# Set region to match the raster
+gscript.run_command('g.region', raster='frag_unique_masked@PERMANENT')
+# Tile the raster
+gscript.run_command('r.tile', input='frag_unique_masked@PERMANENT', output='name_tile', width=20000, height=20000)
+# Define the path to the output directory
+output_dir = r"I:\biocon\Emmanuel_Oceguera\projects\2023_09_Fragmentation\estonia_forest_fragmentation\result_grass\tiles_clump_masked"
+# Get a list of all tiles that start with 'tile_test*'
+tiles = gscript.read_command('g.list', type='raster', pattern='name_tile*', mapset='PERMANENT').strip().splitlines()
+# Export each tile to a GeoTIFF file
+for tile in tiles:
+    # Set region to match the raster before exporting__
+    gscript.run_command('g.region', raster=tile)
+    # Define the output file path
+    output_file = os.path.join(output_dir, f"{tile}.tif")
+    # Export the tile using r.out.gdal
+    print(f"Exporting {tile} to {output_file}")
+    result = gscript.run_command('r.out.gdal', flags='c', input=tile, output=output_file, format='GTiff', nodata=0, type='Int32', overwrite=True, createopt="COMPRESS=DEFLATE")
+    if result != 0:
+        print(f"Failed to export {tile}.")
+    else:
+        print(f"Successfully exported {tile}.")
--- a/8.delete_empty_tiles.py
+++ b/8.delete_empty_tiles.py
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Oct 25 15:35:02 2023
+@author: no67wuwu
+work: we delete the tiles that dont have any information or No data
+"""
+import os
+import subprocess
+from glob import glob
+# Define the function
+def get_stdout(cmd, verbose=False):
+    """
+    send command to the commandline and fetch the return
+    oprion verbose: will also print the return
+    from http://blog.kagesenshi.org/2008/02/teeing-python-subprocesspopen-output.html
+    """
+    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    stdout = []
+    while True:
+        line = p.stdout.readline()
+        line = line.decode('utf-8') # Decode the bytes into a string
+        line = line.replace('\n','').replace('\r','')
+        if line != '':
+            stdout.append(line)
+        if verbose:
+            print(line),
+        if line == '' and p.poll() != None: #p.poll is checking if the process is still running 
+                                            #is it is running it returns None
+            break
+    return stdout
+path = r'S:\Emmanuel_OcegueraConchas\eu_frga_forest\result_grass\eu_forest_frag_unique_masked\*.tif'
+files = glob(path)
+for f in files:
+    cmd = 'gdalinfo -stats %s'
+    cmd_ret = get_stdout(cmd % f)
+    if 'NoData Value' in cmd_ret[-1]:
+        os.remove(f)
+        print("removed file: " + f.split(os.sep)[-1]) # print the file name only (without the path)
\ No newline at end of file
--- a/9.tiles_to_1km_optimized.py
+++ b/9.tiles_to_1km_optimized.py
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Nov 30 13:12:06 2023
+@author: no67wuwu
+work: convert all the tiles to 1km in a very optimized way using parallel processing.
+"""
+# Import libraries
+import os
+from glob import glob
+# library to perform parallel processing
+from concurrent.futures import ThreadPoolExecutor
+# function ro perform the gdalwrap to set a diferenf resolutio; first to tr100 and the to tr1000
+def process_file(f, path, outpath):
+    name = f.split(os.sep)[-1]
+    outname = os.path.join(outpath, name)
+    cmd = 'gdalwarp -tr 1000 1000 -r mode -srcnodata 0 -co "Compression=deflate" {} {}'
+    os.system(cmd.format(f, outname))
+    return 'done with {}'.format(name)
+# define the path
+path = r'S:\Emmanuel_OcegueraConchas\eu_frga_forest\result_grass\eu_forest_frag_unique_masked_tr100'
+outpath = r'S:\Emmanuel_OcegueraConchas\eu_frga_forest\result_grass\eu_forest_frag_unique_masked_tr1000'
+# get all the tiles 
+files = glob(os.path.join(path, '*.tif'))
+# Use ThreadPoolExecutor to parallelize the process
+with ThreadPoolExecutor(max_workers=20) as executor: # adapt to computer capacity
+    results = executor.map(lambda f: process_file(f, path, outpath), files)
+# print results
+for result in results:
+    print(result)