6 changed files with 108 additions and 62 deletions
--- a/.gitea/workflows/build.yaml
+++ b/.gitea/workflows/build.yaml
@ -0,0 +1,39 @@
 name: Actions Build Docker Image
 run-name: ${{ gitea.actor }} is building new image 🚀
 on: [push]
 jobs:
  Explore-Gitea-Actions:
    runs-on: soaska
    steps:
      - run: echo "🎉 The job was automatically triggered by a ${{ gitea.event_name }} event."
      - name: Install Node.js
        run: |
          apk add --no-cache nodejs          
      - name: Check out repository code
        uses: actions/checkout@v3
      - name: List files in the repository
        run: |
          ls ${{ gitea.workspace }}                    
      - run: echo "🍏 This job's status is ${{ job.status }}."
      - name: Install Docker
        run: |
          apk add --no-cache docker          
      - name: Start Docker service
        run: |
          dockerd &
          sleep 5
          docker info                    
      - name: Build Dockerfile
        run: |
          docker build -t $(basename ${{ github.repository }}) .        
      - name: Upload Docker image to soaska.ru
        run: |
          package_name=$(basename ${{ github.repository }})
          branch_name=$(git rev-parse --abbrev-ref HEAD)
          docker login -u ${{ secrets.username }} -p ${{ secrets.password }} soaska.ru
          docker tag $package_name soaska.ru/soaska/$package_name:$branch_name
          docker tag $package_name soaska.ru/soaska/$package_name:latest
          docker push soaska.ru/soaska/$package_name:$branch_name
          docker push soaska.ru/soaska/$package_name:latest
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,6 @@
 /__pycache__
 /logfile.log
 /data
 # Временно
 /docker
--- a/8
+++ b/8
@ -1,15 +1,11 @@
 FROM python:3.11-slim-bullseye
 WORKDIR /app/
 COPY requirements.txt .
 RUN pip install -r requirements.txt --no-cache-dir
 RUN playwright install chromium
 RUN playwright install-deps
 COPY . .
-
+ENTRYPOINT ["python", "main.py"]
-CMD ["python", "main.py"]
+CMD [ "start" ]
--- a/README.md
+++ b/README.md
@ -14,8 +14,8 @@ cd eljur
 ```
 #### Создание .env файла
-Используйте sample.env, чтобы создать свой .env файл.
+Используйте [sample.env](https://git.soaska.ru/sosiska/eljur/-/blob/main/sample.env?ref_type=heads), чтобы создать свой .env файл.
-env.example - пример, как может выглядеть файл .env
+[env.example](https://git.soaska.ru/sosiska/eljur/-/blob/main/env.example?ref_type=heads) - пример, как может выглядеть файл .env
 ```
 ELJUR_LOGIN=Vasya2005
--- a/main.py
+++ b/main.py
@ -48,8 +48,8 @@ def get_html():
        with open('data/table.html') as html_file:
            val = html_file.read()
        logging.debug("data/table.html got")
-    except:
+    except Exception as error:
-        logging.error(f"cant read data/table.html: {Exception}")
+        logging.error(f"cant read data/table.html: {str(error)}")
    return val
@ -78,6 +78,7 @@ def verify_data():
 def run():
    global new_table, old_table
    new_table = scrape()
    old_table = get_html()
    old_last_block = ""
@ -95,14 +96,19 @@ def run():
            update_html(new_table)
            new_table_soup = BeautifulSoup(new_table, "lxml")
-            new_table = new_table_soup.prettify()
+            new_table_pretty = new_table_soup.prettify()
            old_table_soup = BeautifulSoup(old_table, "lxml")
-            old_table = old_table_soup.prettify()
+            old_table_pretty = old_table_soup.prettify()
-            get_difference(old_table, new_table)
+            get_difference(old_table_pretty, new_table_pretty)
            if old_last_block != new_last_block:
                send_photo("data/screenshot.png")
            else:
                send_photo("sad.png")
            send_document("data/difference.html")
            send_document("data/table.html")
            return
    logging.debug("same result. nothing to do")
--- a/scraper.py
+++ b/scraper.py
@ -6,24 +6,22 @@ from playwright._impl import _api_types
 from os import mkdir
 # Use sync version of Playwright
 p = sync_playwright().start()
 # Launch the browser
 browser = p.chromium.launch()
 logging.debug("browser opened")
 # Open a new browser page
 page = browser.new_page()
 def scrape():
-    # Use sync version of Playwright
+    global page
-
+    try:
-    with sync_playwright() as p:
+        # Open our test file in the opened page
-
+        page.goto(f"https://{DOMAIN}.eljur.ru/authorize?return_uri=%2Fjournal-board-action")
        try:
            # Launch the browser
            browser = p.chromium.launch()
            logging.debug("browser opened")
            # Open a new browser page
            page = browser.new_page()
            # Create a URI for our test file
            # Open our test file in the opened page
            page.goto(f"https://{DOMAIN}.eljur.ru/authorize?return_uri=%2Fjournal-board-action")
            # Log In
            login_field = page.locator('[type="text"]')
            password_field = page.locator('[type="password"]')
@ -33,36 +31,40 @@ def scrape():
            submit_button = page.locator('[type="submit"]')
            submit_button.click()
        except Exception as error:
            logging.debug(f"Error while request: {str(error)}")
            page.goto(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
            pass
-            page.wait_for_url(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
+        page.wait_for_url(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
-    #            page.wait_for_load_state("domcontentloaded")
+        page.wait_for_load_state("domcontentloaded")
        if page.url != f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}" and page.url != f"https://{DOMAIN}.eljur.ru/journal-board-action":
            page.goto(f"https://{DOMAIN}.eljur.ru/journal-board-action")
            page.wait_for_load_state("domcontentloaded")
-            page_content = page.content()
+        page_content = page.content()
-            new_content = page.locator('[class="board-item active"]').first
+        new_content = page.locator('[class="board-item active"]').first
-            try:
+        try:
-                new_content.screenshot(path = "data/screenshot.png")
+            new_content.screenshot(path = "data/screenshot.png")
-            except FileNotFoundError:
+        except FileNotFoundError:
-                mkdir("data")
+            mkdir("data")
-                logging.info("can`t take screenshot")
+            logging.info("can`t take screenshot")
-                logging.info("retry")
+            logging.info("retry")
-                new_content.screenshot(path = "data/screenshot.png")
+            new_content.screenshot(path = "data/screenshot.png")
            except:
                logging.error("can`t take screenshot")
            # Close browser
            browser.close()
            logging.debug("browser closed")
            # Process extracted content with BeautifulSoup
            soup = BeautifulSoup(page_content, features="lxml")
            logging.debug("content extracted")
    #        return '\n'.join(el.strip() for el in str(soup.get_text).split('\n') if el.strip())
            return str(soup.get_text)
        except _api_types.TimeoutError:
            logging.error("connection timed out")
            return " "
        except:
-            logging.error(f"Error while request: {Exception}")
+            logging.error("can`t take screenshot")
-            return " "
+
        # Process extracted content with BeautifulSoup
        soup = BeautifulSoup(page_content, features="lxml")
        logging.debug("content extracted")
 #        return '\n'.join(el.strip() for el in str(soup.get_text).split('\n') if el.strip())
        return str(soup.get_text)
    except _api_types.TimeoutError:
        logging.error("connection timed out")
        return " "
    except Exception as error:
        logging.error(f"Error while request: {str(error)}")
        return " "