Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 91 additions & 15 deletions phpblm.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,67 +2,143 @@

/**
* Parser for Rightmove's lovely BLM files.
*
* BLM (Bulk Load Media) format structure:
* - Section delimited by '#' characters
* - Header section contains EOF (End Of Field) and EOR (End Of Record) delimiters
* - Definition section lists field names
* - Data section contains property records
*/

class phpblm {
private $blm;
private $header = array();
private $def = array();
private $data = array();


// Maximum file size: 50MB to prevent memory exhaustion
const MAX_FILE_SIZE = 52428800;

public function __construct($file) {
// Validate file path
if (!is_string($file) || empty($file)) {
throw new InvalidArgumentException('File path must be a non-empty string');
}

if (!file_exists($file)) {
throw new InvalidArgumentException("File does not exist: $file");
}

if (!is_readable($file)) {
throw new InvalidArgumentException("File is not readable: $file");
}

// Check file size to prevent memory exhaustion
$fileSize = filesize($file);
if ($fileSize === false) {
throw new RuntimeException("Cannot determine file size: $file");
}

if ($fileSize > self::MAX_FILE_SIZE) {
throw new RuntimeException("File too large. Maximum size: " . self::MAX_FILE_SIZE . " bytes");
}

// Read file with error handling
$this->blm = file_get_contents($file);
if ($this->blm === false) {
throw new RuntimeException("Failed to read file: $file");
}

$this->splitPieces();
}

// Return specific field from row
public function getData($data, $row) {
if (!isset($this->data[$row])) {
throw new OutOfBoundsException("Row index $row does not exist");
}

if (!isset($this->data[$row][$data])) {
throw new OutOfBoundsException("Field '$data' does not exist in row $row");
}

return $this->data[$row][$data];
}

// Return header info
public function getHeader($hdr) {
if (!isset($this->header[$hdr])) {
throw new OutOfBoundsException("Header field '$hdr' does not exist");
}

return $this->header[$hdr];
}

// This will return the actual number of properties, regardless of what the
// header might say. To retrieve the header value (if present), use:
// $blm->getHeader('Property Count');
public function propCount() {
return count($this->data);
}

public function properties() {
return $this->data;
}

// Splits the BLM data into constituent parts
private function splitPieces() {
$pieces = explode("#", $this->blm);


// Validate we have enough sections
if (count($pieces) < 7) {
throw new RuntimeException("Invalid BLM format: insufficient sections (expected at least 7, got " . count($pieces) . ")");
}

// Get the header (includes EOF/EOR stuff)
$header = explode("\n", trim($pieces[2]));
foreach ($header as $h) {
$h = preg_replace("/\'/", "", $h); // Remove quotes on EOF/EOR
$h_pieces = explode(" : ", $h);
$this->header[trim($h_pieces[0])] = trim($h_pieces[1]);
$h_pieces = explode(" : ", $h, 2); // Limit to 2 pieces in case value contains ' : '

// Validate header line format
if (count($h_pieces) >= 2) {
$this->header[trim($h_pieces[0])] = trim($h_pieces[1]);
}
}


// Validate required headers exist
if (!isset($this->header['EOF'])) {
throw new RuntimeException("Invalid BLM format: EOF delimiter not found in header");
}

if (!isset($this->header['EOR'])) {
throw new RuntimeException("Invalid BLM format: EOR delimiter not found in header");
}

// Get the definitions
$def = explode($this->header['EOF'], trim($pieces[4]));
foreach ($def as $d) {
$this->def[] = $d;
}

// Get the data
$data = explode($this->header['EOR'], trim($pieces[6]));
$datac = count($data);
$dataCount = count($data);

for ($i = 0; $i < $dataCount; $i++) {
// CRITICAL FIX: Initialize $row_arr for each iteration to prevent data leakage
$row_arr = array();

for ($i=0; $i<$datac; $i++) {
$row = explode($this->header['EOF'], trim($data[$i]));
for ($j=0; $j<count($row); $j++) {
$row_arr[$this->def[$j]] = $row[$j];
$rowLength = count($row);

for ($j = 0; $j < $rowLength; $j++) {
// Bounds check: ensure definition exists for this field
if (isset($this->def[$j])) {
$row_arr[$this->def[$j]] = $row[$j];
}
}

$this->data[] = $row_arr;
}
}
Expand Down